criterion/
routine.rs

1use {
2    crate::{
3        benchmark::BenchmarkConfig,
4        connection::OutgoingMessage,
5        measurement::Measurement,
6        report::{BenchmarkId, Report, ReportContext},
7        ActualSamplingMode, Bencher, Criterion,
8    },
9    std::{hint::black_box, marker::PhantomData, time::Duration},
10};
11
12/// PRIVATE
13pub(crate) trait Routine<M: Measurement, T: ?Sized> {
14    /// PRIVATE
15    fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64>;
16    /// PRIVATE
17    fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64);
18
19    /// PRIVATE
20    fn test(&mut self, m: &M, parameter: &T) {
21        self.bench(m, &[1u64], parameter);
22    }
23
24    /// Iterates the benchmarked function for a fixed length of time, but takes no measurements.
25    /// This keeps the overall benchmark suite runtime constant-ish even when running under a
26    /// profiler with an unknown amount of overhead. Since no measurements are taken, it also
27    /// reduces the amount of time the execution spends in Criterion.rs code, which should help
28    /// show the performance of the benchmarked code more clearly as well.
29    fn profile(
30        &mut self,
31        measurement: &M,
32        id: &BenchmarkId,
33        criterion: &Criterion<M>,
34        report_context: &ReportContext,
35        time: Duration,
36        parameter: &T,
37    ) {
38        criterion
39            .report
40            .profile(id, report_context, time.as_nanos() as f64);
41
42        let mut profile_path = report_context.output_directory.clone();
43        if crate::cargo_criterion_connection().is_some() {
44            // If connected to cargo-criterion, generate a cargo-criterion-style path.
45            // This is kind of a hack.
46            profile_path.push("profile");
47            profile_path.push(id.as_directory_name());
48        } else {
49            profile_path.push(id.as_directory_name());
50            profile_path.push("profile");
51        }
52        criterion
53            .profiler
54            .borrow_mut()
55            .start_profiling(id.id(), &profile_path);
56
57        let time = time.as_nanos() as u64;
58
59        // TODO: Some profilers will show the two batches of iterations as
60        // being different code-paths even though they aren't really.
61
62        // Get the warmup time for one second
63        let (wu_elapsed, wu_iters) = self.warm_up(measurement, Duration::from_secs(1), parameter);
64        if wu_elapsed < time {
65            // Initial guess for the mean execution time
66            let met = wu_elapsed as f64 / wu_iters as f64;
67
68            // Guess how many iterations will be required for the remaining time
69            let remaining = (time - wu_elapsed) as f64;
70
71            let iters = remaining / met;
72            let iters = iters as u64;
73
74            self.bench(measurement, &[iters], parameter);
75        }
76
77        criterion
78            .profiler
79            .borrow_mut()
80            .stop_profiling(id.id(), &profile_path);
81
82        criterion.report.terminated(id, report_context);
83    }
84
85    fn sample(
86        &mut self,
87        measurement: &M,
88        id: &BenchmarkId,
89        config: &BenchmarkConfig,
90        criterion: &Criterion<M>,
91        report_context: &ReportContext,
92        parameter: &T,
93    ) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) {
94        if config.quick_mode {
95            let minimum_bench_duration = Duration::from_millis(100);
96            let maximum_bench_duration = config.measurement_time; // default: 5 seconds
97            let target_rel_stdev = config.significance_level; // default: 5%, 0.05
98
99            use std::time::Instant;
100            let time_start = Instant::now();
101
102            let sq = |val| val * val;
103            let mut n = 1;
104            let mut t_prev = *self.bench(measurement, &[n], parameter).first().unwrap();
105
106            // Early exit for extremely long running benchmarks:
107            if time_start.elapsed() > maximum_bench_duration {
108                let iters = vec![n as f64, n as f64].into_boxed_slice();
109                // prevent plotting bug where KDE estimation results in NaN when all values are equal because
110                // the stddev is 0.
111                let elapsed = vec![t_prev, t_prev.next_up()].into_boxed_slice();
112                return (ActualSamplingMode::Flat, iters, elapsed);
113            }
114
115            // Main data collection loop.
116            loop {
117                let t_now = *self
118                    .bench(measurement, &[n * 2], parameter)
119                    .first()
120                    .unwrap();
121                let t = (t_prev + 2. * t_now) / 5.;
122                let stdev = (sq(t_prev - t) + sq(t_now - 2. * t)).sqrt();
123                // println!("Sample: {} {:.2}", n, stdev / t);
124                let elapsed = time_start.elapsed();
125                if (stdev < target_rel_stdev * t && elapsed > minimum_bench_duration)
126                    || elapsed > maximum_bench_duration
127                {
128                    let iters = vec![n as f64, (n * 2) as f64].into_boxed_slice();
129                    let elapsed = vec![t_prev, t_now].into_boxed_slice();
130                    return (ActualSamplingMode::Linear, iters, elapsed);
131                }
132                n *= 2;
133                t_prev = t_now;
134            }
135        }
136        let wu = config.warm_up_time;
137        let m_ns = config.measurement_time.as_nanos();
138
139        criterion
140            .report
141            .warmup(id, report_context, wu.as_nanos() as f64);
142
143        if let Some(conn) = &criterion.connection {
144            conn.send(&OutgoingMessage::Warmup {
145                id: id.into(),
146                nanos: wu.as_nanos() as f64,
147            })
148            .unwrap();
149        }
150
151        let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu, parameter);
152        if crate::debug_enabled() {
153            println!(
154                "\nCompleted {} iterations in {} nanoseconds, estimated execution time is {} ns",
155                wu_iters,
156                wu_elapsed,
157                wu_elapsed as f64 / wu_iters as f64
158            );
159        }
160
161        // Initial guess for the mean execution time
162        let met = wu_elapsed as f64 / wu_iters as f64;
163
164        let n = config.sample_size as u64;
165
166        let actual_sampling_mode = config
167            .sampling_mode
168            .choose_sampling_mode(met, n, m_ns as f64);
169
170        let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time);
171
172        let expected_ns = m_iters
173            .iter()
174            .copied()
175            .map(|count| count as f64 * met)
176            .sum();
177
178        // Use saturating_add to handle overflow.
179        let mut total_iters = 0u64;
180        for count in m_iters.iter().copied() {
181            total_iters = total_iters.saturating_add(count);
182        }
183
184        criterion
185            .report
186            .measurement_start(id, report_context, n, expected_ns, total_iters);
187
188        if let Some(conn) = &criterion.connection {
189            conn.send(&OutgoingMessage::MeasurementStart {
190                id: id.into(),
191                sample_count: n,
192                estimate_ns: expected_ns,
193                iter_count: total_iters,
194            })
195            .unwrap();
196        }
197
198        let m_elapsed = self.bench(measurement, &m_iters, parameter);
199
200        let m_iters_f: Vec<f64> = m_iters.iter().map(|&x| x as f64).collect();
201
202        (
203            actual_sampling_mode,
204            m_iters_f.into_boxed_slice(),
205            m_elapsed.into_boxed_slice(),
206        )
207    }
208}
209
210pub struct Function<M: Measurement, F, T>
211where
212    F: FnMut(&mut Bencher<'_, M>, &T),
213    T: ?Sized,
214{
215    f: F,
216    // TODO: Is there some way to remove these?
217    _phantom: PhantomData<T>,
218    _phamtom2: PhantomData<M>,
219}
220impl<M: Measurement, F, T> Function<M, F, T>
221where
222    F: FnMut(&mut Bencher<'_, M>, &T),
223    T: ?Sized,
224{
225    pub fn new(f: F) -> Function<M, F, T> {
226        Function {
227            f,
228            _phantom: PhantomData,
229            _phamtom2: PhantomData,
230        }
231    }
232}
233
234impl<M: Measurement, F, T> Routine<M, T> for Function<M, F, T>
235where
236    F: FnMut(&mut Bencher<'_, M>, &T),
237    T: ?Sized,
238{
239    fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64> {
240        let f = &mut self.f;
241
242        let mut b = Bencher {
243            iterated: false,
244            iters: 0,
245            value: m.zero(),
246            measurement: m,
247            elapsed_time: Duration::from_millis(0),
248        };
249
250        let mut results = Vec::with_capacity(iters.len());
251        results.resize(iters.len(), 0.0);
252        for (i, iters) in iters.iter().enumerate() {
253            #[cfg(any(target_family = "unix", target_family = "windows"))]
254            {
255                // Intentionally vary the stack allocation size to reduce measurement bias from
256                // memory alignment and cache effects.
257                // The shift can go up to a full page size suitable for the system.
258                alloca::with_alloca(
259                    i % page_size::get(), /* how many bytes we want to allocate */
260                    |_shifting_stack_space: &mut [core::mem::MaybeUninit<u8>] /* stack allocated slice itself */| {
261                        b.iters = *iters;
262                        (*f)(&mut b, black_box(parameter));
263                        b.assert_iterated();
264                        results[i] = m.to_f64(&b.value);
265                    },
266                );
267            }
268            #[cfg(not(any(target_family = "unix", target_family = "windows")))]
269            {
270                b.iters = *iters;
271                (*f)(&mut b, black_box(parameter));
272                b.assert_iterated();
273                results[i] = m.to_f64(&b.value);
274            }
275        }
276        results
277    }
278
279    fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) {
280        let f = &mut self.f;
281        let mut b = Bencher {
282            iterated: false,
283            iters: 1,
284            value: m.zero(),
285            measurement: m,
286            elapsed_time: Duration::from_millis(0),
287        };
288
289        let mut total_iters = 0;
290        let mut elapsed_time = Duration::from_millis(0);
291        loop {
292            (*f)(&mut b, black_box(parameter));
293
294            b.assert_iterated();
295
296            total_iters += b.iters;
297            elapsed_time += b.elapsed_time;
298            if elapsed_time > how_long {
299                return (elapsed_time.as_nanos() as u64, total_iters);
300            }
301
302            b.iters = b.iters.wrapping_mul(2);
303        }
304    }
305}