criterion/stats/univariate/
sample.rs

1use std::{mem, ops};
2
3use crate::stats::float::Float;
4use crate::stats::tuple::{Tuple, TupledDistributionsBuilder};
5use crate::stats::univariate::Percentiles;
6use crate::stats::univariate::Resamples;
7#[cfg(feature = "rayon")]
8use rayon::prelude::*;
9
10/// A collection of data points drawn from a population
11///
12/// Invariants:
13///
14/// - The sample contains at least 2 data points
15/// - The sample contains no `NaN`s
16#[repr(transparent)]
17pub struct Sample<A>([A]);
18
19// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module
20impl<A> Sample<A>
21where
22    A: Float,
23{
24    /// Creates a new sample from an existing slice
25    ///
26    /// # Panics
27    ///
28    /// Panics if `slice` contains any `NaN` or if `slice` has less than two elements
29    #[cfg_attr(feature = "cargo-clippy", allow(clippy::new_ret_no_self))]
30    pub fn new(slice: &[A]) -> &Sample<A> {
31        assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan()));
32
33        unsafe { mem::transmute(slice) }
34    }
35
36    /// Returns the biggest element in the sample
37    ///
38    /// - Time: `O(length)`
39    pub fn max(&self) -> A {
40        let mut elems = self.iter();
41
42        match elems.next() {
43            Some(&head) => elems.fold(head, |a, &b| a.max(b)),
44            // NB `unreachable!` because `Sample` is guaranteed to have at least one data point
45            None => unreachable!(),
46        }
47    }
48
49    /// Returns the arithmetic average of the sample
50    ///
51    /// - Time: `O(length)`
52    pub fn mean(&self) -> A {
53        let n = self.len();
54
55        self.sum() / A::cast(n)
56    }
57
58    /// Returns the median absolute deviation
59    ///
60    /// The `median` can be optionally passed along to speed up (2X) the computation
61    ///
62    /// - Time: `O(length)`
63    /// - Memory: `O(length)`
64    pub fn median_abs_dev(&self, median: Option<A>) -> A
65    where
66        usize: cast::From<A, Output = Result<usize, cast::Error>>,
67    {
68        let median = median.unwrap_or_else(|| self.percentiles().median());
69
70        // NB Although this operation can be SIMD accelerated, the gain is negligible because the
71        // bottle neck is the sorting operation which is part of the computation of the median
72        let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>();
73
74        let abs_devs: &Self = Self::new(&abs_devs);
75
76        abs_devs.percentiles().median() * A::cast(1.4826)
77    }
78
79    /// Returns the median absolute deviation as a percentage of the median
80    ///
81    /// - Time: `O(length)`
82    /// - Memory: `O(length)`
83    pub fn median_abs_dev_pct(&self) -> A
84    where
85        usize: cast::From<A, Output = Result<usize, cast::Error>>,
86    {
87        let _100 = A::cast(100);
88        let median = self.percentiles().median();
89        let mad = self.median_abs_dev(Some(median));
90
91        (mad / median) * _100
92    }
93
94    /// Returns the smallest element in the sample
95    ///
96    /// - Time: `O(length)`
97    pub fn min(&self) -> A {
98        let mut elems = self.iter();
99
100        match elems.next() {
101            Some(&elem) => elems.fold(elem, |a, &b| a.min(b)),
102            // NB `unreachable!` because `Sample` is guaranteed to have at least one data point
103            None => unreachable!(),
104        }
105    }
106
107    /// Returns a "view" into the percentiles of the sample
108    ///
109    /// This "view" makes consecutive computations of percentiles much faster (`O(1)`)
110    ///
111    /// - Time: `O(N log N) where N = length`
112    /// - Memory: `O(length)`
113    pub fn percentiles(&self) -> Percentiles<A>
114    where
115        usize: cast::From<A, Output = Result<usize, cast::Error>>,
116    {
117        use std::cmp::Ordering;
118
119        // NB This function assumes that there are no `NaN`s in the sample
120        fn cmp<T>(a: &T, b: &T) -> Ordering
121        where
122            T: PartialOrd,
123        {
124            match a.partial_cmp(b) {
125                Some(o) => o,
126                // Arbitrary way to handle NaNs that should never happen
127                None => Ordering::Equal,
128            }
129        }
130
131        let mut v = self.to_vec().into_boxed_slice();
132        #[cfg(feature = "rayon")]
133        v.par_sort_unstable_by(cmp);
134        #[cfg(not(feature = "rayon"))]
135        v.sort_unstable_by(cmp);
136
137        // NB :-1: to intra-crate privacy rules
138        unsafe { mem::transmute(v) }
139    }
140
141    /// Returns the standard deviation of the sample
142    ///
143    /// The `mean` can be optionally passed along to speed up (2X) the computation
144    ///
145    /// - Time: `O(length)`
146    pub fn std_dev(&self, mean: Option<A>) -> A {
147        self.var(mean).sqrt()
148    }
149
150    /// Returns the standard deviation as a percentage of the mean
151    ///
152    /// - Time: `O(length)`
153    pub fn std_dev_pct(&self) -> A {
154        let _100 = A::cast(100);
155        let mean = self.mean();
156        let std_dev = self.std_dev(Some(mean));
157
158        (std_dev / mean) * _100
159    }
160
161    /// Returns the sum of all the elements of the sample
162    ///
163    /// - Time: `O(length)`
164    pub fn sum(&self) -> A {
165        crate::stats::sum(self)
166    }
167
168    /// Returns the t score between these two samples
169    ///
170    /// - Time: `O(length)`
171    pub fn t(&self, other: &Sample<A>) -> A {
172        let (x_bar, y_bar) = (self.mean(), other.mean());
173        let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar)));
174        let n_x = A::cast(self.len());
175        let n_y = A::cast(other.len());
176        let num = x_bar - y_bar;
177        let den = (s2_x / n_x + s2_y / n_y).sqrt();
178
179        num / den
180    }
181
182    /// Returns the variance of the sample
183    ///
184    /// The `mean` can be optionally passed along to speed up (2X) the computation
185    ///
186    /// - Time: `O(length)`
187    pub fn var(&self, mean: Option<A>) -> A {
188        use std::ops::Add;
189
190        let mean = mean.unwrap_or_else(|| self.mean());
191        let slice = self;
192
193        let sum = slice
194            .iter()
195            .map(|&x| (x - mean).powi(2))
196            .fold(A::cast(0), Add::add);
197
198        sum / A::cast(slice.len() - 1)
199    }
200
201    // TODO Remove the `T` parameter in favor of `S::Output`
202    /// Returns the bootstrap distributions of the parameters estimated by the 1-sample statistic
203    ///
204    /// - Multi-threaded
205    /// - Time: `O(nresamples)`
206    /// - Memory: `O(nresamples)`
207    pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
208    where
209        S: Fn(&Sample<A>) -> T + Sync,
210        T: Tuple + Send,
211        T::Distributions: Send,
212        T::Builder: Send,
213    {
214        #[cfg(feature = "rayon")]
215        {
216            (0..nresamples)
217                .into_par_iter()
218                .map_init(
219                    || Resamples::new(self),
220                    |resamples, _| statistic(resamples.next()),
221                )
222                .fold(
223                    || T::Builder::new(0),
224                    |mut sub_distributions, sample| {
225                        sub_distributions.push(sample);
226                        sub_distributions
227                    },
228                )
229                .reduce(
230                    || T::Builder::new(0),
231                    |mut a, mut b| {
232                        a.extend(&mut b);
233                        a
234                    },
235                )
236                .complete()
237        }
238        #[cfg(not(feature = "rayon"))]
239        {
240            let mut resamples = Resamples::new(self);
241            (0..nresamples)
242                .map(|_| statistic(resamples.next()))
243                .fold(T::Builder::new(0), |mut sub_distributions, sample| {
244                    sub_distributions.push(sample);
245                    sub_distributions
246                })
247                .complete()
248        }
249    }
250
251    #[cfg(test)]
252    pub fn iqr(&self) -> A
253    where
254        usize: cast::From<A, Output = Result<usize, cast::Error>>,
255    {
256        self.percentiles().iqr()
257    }
258
259    #[cfg(test)]
260    pub fn median(&self) -> A
261    where
262        usize: cast::From<A, Output = Result<usize, cast::Error>>,
263    {
264        self.percentiles().median()
265    }
266}
267
268impl<A> ops::Deref for Sample<A> {
269    type Target = [A];
270
271    fn deref(&self) -> &[A] {
272        &self.0
273    }
274}