criterion/
benchmark_group.rs

1use crate::analysis;
2use crate::benchmark::PartialBenchmarkConfig;
3use crate::connection::OutgoingMessage;
4use crate::measurement::Measurement;
5use crate::report::BenchmarkId as InternalBenchmarkId;
6use crate::report::Report;
7use crate::report::ReportContext;
8use crate::routine::{Function, Routine};
9use crate::{Bencher, Criterion, Mode, PlotConfiguration, SamplingMode, Throughput};
10use std::time::Duration;
11
12/// Structure used to group together a set of related benchmarks, along with custom configuration
13/// settings for groups of benchmarks. All benchmarks performed using a benchmark group will be
14/// grouped together in the final report.
15///
16/// # Examples:
17///
18/// ```no_run
19/// #[macro_use] extern crate criterion;
20/// use self::criterion::*;
21/// use std::time::Duration;
22///
23/// fn bench_simple(c: &mut Criterion) {
24///     let mut group = c.benchmark_group("My Group");
25///
26///     // Now we can perform benchmarks with this group
27///     group.bench_function("Bench 1", |b| b.iter(|| 1 ));
28///     group.bench_function("Bench 2", |b| b.iter(|| 2 ));
29///    
30///     // It's recommended to call group.finish() explicitly at the end, but if you don't it will
31///     // be called automatically when the group is dropped.
32///     group.finish();
33/// }
34///
35/// fn bench_nested(c: &mut Criterion) {
36///     let mut group = c.benchmark_group("My Second Group");
37///     // We can override the configuration on a per-group level
38///     group.measurement_time(Duration::from_secs(1));
39///
40///     // We can also use loops to define multiple benchmarks, even over multiple dimensions.
41///     for x in 0..3 {
42///         for y in 0..3 {
43///             let point = (x, y);
44///             let parameter_string = format!("{} * {}", x, y);
45///             group.bench_with_input(BenchmarkId::new("Multiply", parameter_string), &point,
46///                 |b, (p_x, p_y)| b.iter(|| p_x * p_y));
47///         }
48///     }
49///    
50///     group.finish();
51/// }
52///
53/// fn bench_throughput(c: &mut Criterion) {
54///     let mut group = c.benchmark_group("Summation");
55///     
56///     for size in [1024, 2048, 4096].iter() {
57///         // Generate input of an appropriate size...
58///         let input = vec![1u64, *size];
59///
60///         // We can use the throughput function to tell Criterion.rs how large the input is
61///         // so it can calculate the overall throughput of the function. If we wanted, we could
62///         // even change the benchmark configuration for different inputs (eg. to reduce the
63///         // number of samples for extremely large and slow inputs) or even different functions.
64///         group.throughput(Throughput::Elements(*size as u64));
65///
66///         group.bench_with_input(BenchmarkId::new("sum", *size), &input,
67///             |b, i| b.iter(|| i.iter().sum::<u64>()));
68///         group.bench_with_input(BenchmarkId::new("fold", *size), &input,
69///             |b, i| b.iter(|| i.iter().fold(0u64, |a, b| a + b)));
70///     }
71///
72///     group.finish();
73/// }
74///
75/// criterion_group!(benches, bench_simple, bench_nested, bench_throughput);
76/// criterion_main!(benches);
77/// ```
78pub struct BenchmarkGroup<'a, M: Measurement> {
79    criterion: &'a mut Criterion<M>,
80    group_name: String,
81    all_ids: Vec<InternalBenchmarkId>,
82    any_matched: bool,
83    partial_config: PartialBenchmarkConfig,
84    throughput: Option<Throughput>,
85}
86impl<'a, M: Measurement> BenchmarkGroup<'a, M> {
87    /// Changes the size of the sample for this benchmark
88    ///
89    /// A bigger sample should yield more accurate results if paired with a sufficiently large
90    /// measurement time.
91    ///
92    /// Sample size must be at least 10.
93    ///
94    /// # Panics
95    ///
96    /// Panics if n < 10.
97    pub fn sample_size(&mut self, n: usize) -> &mut Self {
98        assert!(n >= 10);
99
100        self.partial_config.sample_size = Some(n);
101        self
102    }
103
104    /// Changes the warm up time for this benchmark
105    ///
106    /// # Panics
107    ///
108    /// Panics if the input duration is zero
109    pub fn warm_up_time(&mut self, dur: Duration) -> &mut Self {
110        assert!(dur.as_nanos() > 0);
111
112        self.partial_config.warm_up_time = Some(dur);
113        self
114    }
115
116    /// Changes the target measurement time for this benchmark group.
117    ///
118    /// Criterion will attempt to spent approximately this amount of time measuring each
119    /// benchmark on a best-effort basis. If it is not possible to perform the measurement in
120    /// the requested time (eg. because each iteration of the benchmark is long) then Criterion
121    /// will spend as long as is needed to collect the desired number of samples. With a longer
122    /// time, the measurement will become more resilient to interference from other programs.
123    ///
124    /// # Panics
125    ///
126    /// Panics if the input duration is zero
127    pub fn measurement_time(&mut self, dur: Duration) -> &mut Self {
128        assert!(dur.as_nanos() > 0);
129
130        self.partial_config.measurement_time = Some(dur);
131        self
132    }
133
134    /// Changes the number of resamples for this benchmark group
135    ///
136    /// Number of resamples to use for the
137    /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
138    ///
139    /// A larger number of resamples reduces the random sampling errors which are inherent to the
140    /// bootstrap method, but also increases the analysis time.
141    ///
142    /// # Panics
143    ///
144    /// Panics if the number of resamples is set to zero
145    pub fn nresamples(&mut self, n: usize) -> &mut Self {
146        assert!(n > 0);
147        if n <= 1000 {
148            eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
149        }
150
151        self.partial_config.nresamples = Some(n);
152        self
153    }
154
155    /// Changes the noise threshold for benchmarks in this group. The noise threshold
156    /// is used to filter out small changes in performance from one run to the next, even if they
157    /// are statistically significant. Sometimes benchmarking the same code twice will result in
158    /// small but statistically significant differences solely because of noise. This provides a way
159    /// to filter out some of these false positives at the cost of making it harder to detect small
160    /// changes to the true performance of the benchmark.
161    ///
162    /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
163    ///
164    /// # Panics
165    ///
166    /// Panics if the threshold is set to a negative value
167    pub fn noise_threshold(&mut self, threshold: f64) -> &mut Self {
168        assert!(threshold >= 0.0);
169
170        self.partial_config.noise_threshold = Some(threshold);
171        self
172    }
173
174    /// Changes the confidence level for benchmarks in this group. The confidence
175    /// level is the desired probability that the true runtime lies within the estimated
176    /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
177    /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
178    ///
179    /// # Panics
180    ///
181    /// Panics if the confidence level is set to a value outside the `(0, 1)` range
182    pub fn confidence_level(&mut self, cl: f64) -> &mut Self {
183        assert!(cl > 0.0 && cl < 1.0);
184        if cl < 0.5 {
185            eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
186        }
187
188        self.partial_config.confidence_level = Some(cl);
189        self
190    }
191
192    /// Changes the [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
193    /// for benchmarks in this group. This is used to perform a
194    /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
195    /// the measurements from this run are different from the measured performance of the last run.
196    /// The significance level is the desired probability that two measurements of identical code
197    /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
198    /// meaning that approximately 5% of identical benchmarks will register as different due to
199    /// noise.
200    ///
201    /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
202    /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
203    /// detect small but real changes in the performance. By setting the significance level
204    /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
205    /// report more spurious differences.
206    ///
207    /// See also the noise threshold setting.
208    ///
209    /// # Panics
210    ///
211    /// Panics if the significance level is set to a value outside the `(0, 1)` range
212    pub fn significance_level(&mut self, sl: f64) -> &mut Self {
213        assert!(sl > 0.0 && sl < 1.0);
214
215        self.partial_config.significance_level = Some(sl);
216        self
217    }
218
219    /// Changes the plot configuration for this benchmark group.
220    pub fn plot_config(&mut self, new_config: PlotConfiguration) -> &mut Self {
221        self.partial_config.plot_config = new_config;
222        self
223    }
224
225    /// Set the input size for this benchmark group. Used for reporting the
226    /// throughput.
227    pub fn throughput(&mut self, throughput: Throughput) -> &mut Self {
228        self.throughput = Some(throughput);
229        self
230    }
231
232    /// Set the sampling mode for this benchmark group.
233    pub fn sampling_mode(&mut self, new_mode: SamplingMode) -> &mut Self {
234        self.partial_config.sampling_mode = Some(new_mode);
235        self
236    }
237
238    pub(crate) fn new(criterion: &mut Criterion<M>, group_name: String) -> BenchmarkGroup<'_, M> {
239        BenchmarkGroup {
240            criterion,
241            group_name,
242            all_ids: vec![],
243            any_matched: false,
244            partial_config: PartialBenchmarkConfig::default(),
245            throughput: None,
246        }
247    }
248
249    /// Benchmark the given parameterless function inside this benchmark group.
250    pub fn bench_function<ID: IntoBenchmarkId, F>(&mut self, id: ID, mut f: F) -> &mut Self
251    where
252        F: FnMut(&mut Bencher<'_, M>),
253    {
254        self.run_bench(id.into_benchmark_id(), &(), |b, _| f(b));
255        self
256    }
257
258    /// Benchmark the given parameterized function inside this benchmark group.
259    pub fn bench_with_input<ID: IntoBenchmarkId, F, I>(
260        &mut self,
261        id: ID,
262        input: &I,
263        f: F,
264    ) -> &mut Self
265    where
266        F: FnMut(&mut Bencher<'_, M>, &I),
267        I: ?Sized,
268    {
269        self.run_bench(id.into_benchmark_id(), input, f);
270        self
271    }
272
273    fn run_bench<F, I>(&mut self, id: BenchmarkId, input: &I, f: F)
274    where
275        F: FnMut(&mut Bencher<'_, M>, &I),
276        I: ?Sized,
277    {
278        let config = self.partial_config.to_complete(&self.criterion.config);
279        let report_context = ReportContext {
280            output_directory: self.criterion.output_directory.clone(),
281            plot_config: self.partial_config.plot_config.clone(),
282        };
283
284        let mut id = InternalBenchmarkId::new(
285            self.group_name.clone(),
286            id.function_name,
287            id.parameter,
288            self.throughput.clone(),
289        );
290
291        assert!(
292            !self.all_ids.contains(&id),
293            "Benchmark IDs must be unique within a group. Encountered duplicated benchmark ID {}",
294            &id
295        );
296
297        id.ensure_directory_name_unique(&self.criterion.all_directories);
298        self.criterion
299            .all_directories
300            .insert(id.as_directory_name().to_owned());
301        id.ensure_title_unique(&self.criterion.all_titles);
302        self.criterion.all_titles.insert(id.as_title().to_owned());
303
304        let do_run = self.criterion.filter_matches(id.id());
305        self.any_matched |= do_run;
306        let mut func = Function::new(f);
307
308        match &self.criterion.mode {
309            Mode::Benchmark => {
310                if let Some(conn) = &self.criterion.connection {
311                    if do_run {
312                        conn.send(&OutgoingMessage::BeginningBenchmark { id: (&id).into() })
313                            .unwrap();
314                    } else {
315                        conn.send(&OutgoingMessage::SkippingBenchmark { id: (&id).into() })
316                            .unwrap();
317                    }
318                }
319                if do_run {
320                    analysis::common(
321                        &id,
322                        &mut func,
323                        &config,
324                        self.criterion,
325                        &report_context,
326                        input,
327                        self.throughput.clone(),
328                    );
329                }
330            }
331            Mode::List(_) => {
332                if do_run {
333                    println!("{}: benchmark", id);
334                }
335            }
336            Mode::Test => {
337                if do_run {
338                    // In test mode, run the benchmark exactly once, then exit.
339                    self.criterion.report.test_start(&id, &report_context);
340                    func.test(&self.criterion.measurement, input);
341                    self.criterion.report.test_pass(&id, &report_context);
342                }
343            }
344            &Mode::Profile(duration) => {
345                if do_run {
346                    func.profile(
347                        &self.criterion.measurement,
348                        &id,
349                        self.criterion,
350                        &report_context,
351                        duration,
352                        input,
353                    );
354                }
355            }
356        }
357
358        self.all_ids.push(id);
359    }
360
361    /// Consume the benchmark group and generate the summary reports for the group.
362    ///
363    /// It is recommended to call this explicitly, but if you forget it will be called when the
364    /// group is dropped.
365    pub fn finish(self) {
366        ::std::mem::drop(self);
367    }
368}
369impl<'a, M: Measurement> Drop for BenchmarkGroup<'a, M> {
370    fn drop(&mut self) {
371        // I don't really like having a bunch of non-trivial code in drop, but this is the only way
372        // to really write linear types like this in Rust...
373        if let Some(conn) = &mut self.criterion.connection {
374            conn.send(&OutgoingMessage::FinishedBenchmarkGroup {
375                group: &self.group_name,
376            })
377            .unwrap();
378
379            conn.serve_value_formatter(self.criterion.measurement.formatter())
380                .unwrap();
381        }
382
383        if self.all_ids.len() > 1 && self.any_matched && self.criterion.mode.is_benchmark() {
384            let report_context = ReportContext {
385                output_directory: self.criterion.output_directory.clone(),
386                plot_config: self.partial_config.plot_config.clone(),
387            };
388
389            self.criterion.report.summarize(
390                &report_context,
391                &self.all_ids,
392                self.criterion.measurement.formatter(),
393            );
394        }
395        if self.any_matched && !self.criterion.mode.is_terse() {
396            self.criterion.report.group_separator();
397        }
398    }
399}
400
401/// Simple structure representing an ID for a benchmark. The ID must be unique within a benchmark
402/// group.
403#[derive(Clone, Eq, PartialEq, Hash)]
404pub struct BenchmarkId {
405    pub(crate) function_name: Option<String>,
406    pub(crate) parameter: Option<String>,
407}
408impl BenchmarkId {
409    /// Construct a new benchmark ID from a string function name and a parameter value.
410    ///
411    /// Note that the parameter value need not be the same as the parameter passed to your
412    /// actual benchmark. For instance, you might have a benchmark that takes a 1MB string as
413    /// input. It would be impractical to embed the whole string in the benchmark ID, so instead
414    /// your parameter value might be a descriptive string like "1MB Alphanumeric".
415    ///
416    /// # Examples
417    /// ```
418    /// # use criterion::{BenchmarkId, Criterion};
419    /// // A basic benchmark ID is typically constructed from a constant string and a simple
420    /// // parameter
421    /// let basic_id = BenchmarkId::new("my_id", 5);
422    ///
423    /// // The function name can be a string
424    /// let function_name = "test_string".to_string();
425    /// let string_id = BenchmarkId::new(function_name, 12);
426    ///
427    /// // Benchmark IDs are passed to benchmark groups:
428    /// let mut criterion = Criterion::default();
429    /// let mut group = criterion.benchmark_group("My Group");
430    /// // Generate a very large input
431    /// let input : String = ::std::iter::repeat("X").take(1024 * 1024).collect();
432    ///
433    /// // Note that we don't have to use the input as the parameter in the ID
434    /// group.bench_with_input(BenchmarkId::new("Test long string", "1MB X's"), &input, |b, i| {
435    ///     b.iter(|| i.len())
436    /// });
437    /// ```
438    pub fn new<S: Into<String>, P: ::std::fmt::Display>(
439        function_name: S,
440        parameter: P,
441    ) -> BenchmarkId {
442        BenchmarkId {
443            function_name: Some(function_name.into()),
444            parameter: Some(format!("{}", parameter)),
445        }
446    }
447
448    /// Construct a new benchmark ID from just a parameter value. Use this when benchmarking a
449    /// single function with a variety of different inputs.
450    pub fn from_parameter<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
451        BenchmarkId {
452            function_name: None,
453            parameter: Some(format!("{}", parameter)),
454        }
455    }
456
457    pub(crate) fn no_function() -> BenchmarkId {
458        BenchmarkId {
459            function_name: None,
460            parameter: None,
461        }
462    }
463
464    pub(crate) fn no_function_with_input<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
465        BenchmarkId {
466            function_name: None,
467            parameter: Some(format!("{}", parameter)),
468        }
469    }
470}
471
472mod private {
473    pub trait Sealed {}
474    impl Sealed for super::BenchmarkId {}
475    impl<S: Into<String>> Sealed for S {}
476}
477
478/// Sealed trait which allows users to automatically convert strings to benchmark IDs.
479pub trait IntoBenchmarkId: private::Sealed {
480    fn into_benchmark_id(self) -> BenchmarkId;
481}
482impl IntoBenchmarkId for BenchmarkId {
483    fn into_benchmark_id(self) -> BenchmarkId {
484        self
485    }
486}
487impl<S: Into<String>> IntoBenchmarkId for S {
488    fn into_benchmark_id(self) -> BenchmarkId {
489        let function_name = self.into();
490        assert!(
491            !function_name.is_empty(),
492            "Function name must not be empty."
493        );
494
495        BenchmarkId {
496            function_name: Some(function_name),
497            parameter: None,
498        }
499    }
500}