criterion/
bencher.rs

1use std::iter::IntoIterator;
2use std::time::Duration;
3use std::time::Instant;
4
5use crate::black_box;
6use crate::measurement::{Measurement, WallTime};
7use crate::BatchSize;
8
9#[cfg(feature = "async")]
10use std::future::Future;
11
12#[cfg(feature = "async")]
13use crate::async_executor::AsyncExecutor;
14
15// ================================== MAINTENANCE NOTE =============================================
16// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
17// ================================== MAINTENANCE NOTE =============================================
18
19/// Timer struct used to iterate a benchmarked function and measure the runtime.
20///
21/// This struct provides different timing loops as methods. Each timing loop provides a different
22/// way to time a routine and each has advantages and disadvantages.
23///
24/// * If you want to do the iteration and measurement yourself (eg. passing the iteration count
25///   to a separate process), use `iter_custom`.
26/// * If your routine requires no per-iteration setup and returns a value with an expensive `drop`
27///   method, use `iter_with_large_drop`.
28/// * If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched`
29///   or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes.
30///   If the setup value implements `Drop` and you don't want to include the `drop` time in the
31///   measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also
32///   suitable for benchmarking routines which return a value with an expensive `drop` method,
33///   but are more complex than `iter_with_large_drop`.
34/// * Otherwise, use `iter`.
35pub struct Bencher<'a, M: Measurement = WallTime> {
36    pub(crate) iterated: bool,         // Have we iterated this benchmark?
37    pub(crate) iters: u64,             // Number of times to iterate this benchmark
38    pub(crate) value: M::Value,        // The measured value
39    pub(crate) measurement: &'a M,     // Reference to the measurement object
40    pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
41}
42impl<'a, M: Measurement> Bencher<'a, M> {
43    /// Times a `routine` by executing it many times and timing the total elapsed time.
44    ///
45    /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
46    ///
47    /// # Timing model
48    ///
49    /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
50    /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
51    /// to the runtime of the `routine`.
52    ///
53    /// ```text
54    /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
55    /// ```
56    ///
57    /// # Example
58    ///
59    /// ```rust
60    /// #[macro_use] extern crate criterion;
61    ///
62    /// use criterion::*;
63    ///
64    /// // The function to benchmark
65    /// fn foo() {
66    ///     // ...
67    /// }
68    ///
69    /// fn bench(c: &mut Criterion) {
70    ///     c.bench_function("iter", move |b| {
71    ///         b.iter(|| foo())
72    ///     });
73    /// }
74    ///
75    /// criterion_group!(benches, bench);
76    /// criterion_main!(benches);
77    /// ```
78    ///
79    #[inline(never)]
80    pub fn iter<O, R>(&mut self, mut routine: R)
81    where
82        R: FnMut() -> O,
83    {
84        self.iterated = true;
85        let time_start = Instant::now();
86        let start = self.measurement.start();
87        for _ in 0..self.iters {
88            black_box(routine());
89        }
90        self.value = self.measurement.end(start);
91        self.elapsed_time = time_start.elapsed();
92    }
93
94    /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
95    ///
96    /// Prefer this timing loop in cases where `routine` has to do its own measurements to
97    /// get accurate timing information (for example in multi-threaded scenarios where you spawn
98    /// and coordinate with multiple threads).
99    ///
100    /// # Timing model
101    /// Custom, the timing model is whatever is returned as the Duration from `routine`.
102    ///
103    /// # Example
104    /// ```rust
105    /// #[macro_use] extern crate criterion;
106    /// use criterion::*;
107    /// use criterion::black_box;
108    /// use std::time::Instant;
109    ///
110    /// fn foo() {
111    ///     // ...
112    /// }
113    ///
114    /// fn bench(c: &mut Criterion) {
115    ///     c.bench_function("iter", move |b| {
116    ///         b.iter_custom(|iters| {
117    ///             let start = Instant::now();
118    ///             for _i in 0..iters {
119    ///                 black_box(foo());
120    ///             }
121    ///             start.elapsed()
122    ///         })
123    ///     });
124    /// }
125    ///
126    /// criterion_group!(benches, bench);
127    /// criterion_main!(benches);
128    /// ```
129    ///
130    #[inline(never)]
131    pub fn iter_custom<R>(&mut self, mut routine: R)
132    where
133        R: FnMut(u64) -> M::Value,
134    {
135        self.iterated = true;
136        let time_start = Instant::now();
137        self.value = routine(self.iters);
138        self.elapsed_time = time_start.elapsed();
139    }
140
141    #[doc(hidden)]
142    pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
143    where
144        S: FnMut() -> I,
145        R: FnMut(I) -> O,
146    {
147        self.iter_batched(setup, routine, BatchSize::PerIteration);
148    }
149
150    /// Times a `routine` by collecting its output on each iteration. This avoids timing the
151    /// destructor of the value returned by `routine`.
152    ///
153    /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
154    /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
155    ///
156    /// # Timing model
157    ///
158    /// ``` text
159    /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
160    /// ```
161    ///
162    /// # Example
163    ///
164    /// ```rust
165    /// #[macro_use] extern crate criterion;
166    ///
167    /// use criterion::*;
168    ///
169    /// fn create_vector() -> Vec<u64> {
170    ///     # vec![]
171    ///     // ...
172    /// }
173    ///
174    /// fn bench(c: &mut Criterion) {
175    ///     c.bench_function("with_drop", move |b| {
176    ///         // This will avoid timing the Vec::drop.
177    ///         b.iter_with_large_drop(|| create_vector())
178    ///     });
179    /// }
180    ///
181    /// criterion_group!(benches, bench);
182    /// criterion_main!(benches);
183    /// ```
184    ///
185    pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186    where
187        R: FnMut() -> O,
188    {
189        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
190    }
191
192    /// Times a `routine` that requires some input by generating a batch of input, then timing the
193    /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
194    /// details on choosing the batch size. Use this when the routine must consume its input.
195    ///
196    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
197    /// data on each iteration.
198    ///
199    /// # Timing model
200    ///
201    /// ```text
202    /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
203    /// ```
204    ///
205    /// # Example
206    ///
207    /// ```rust
208    /// #[macro_use] extern crate criterion;
209    ///
210    /// use criterion::*;
211    ///
212    /// fn create_scrambled_data() -> Vec<u64> {
213    ///     # vec![]
214    ///     // ...
215    /// }
216    ///
217    /// // The sorting algorithm to test
218    /// fn sort(data: &mut [u64]) {
219    ///     // ...
220    /// }
221    ///
222    /// fn bench(c: &mut Criterion) {
223    ///     let data = create_scrambled_data();
224    ///
225    ///     c.bench_function("with_setup", move |b| {
226    ///         // This will avoid timing the to_vec call.
227    ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
228    ///     });
229    /// }
230    ///
231    /// criterion_group!(benches, bench);
232    /// criterion_main!(benches);
233    /// ```
234    ///
235    #[inline(never)]
236    pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
237    where
238        S: FnMut() -> I,
239        R: FnMut(I) -> O,
240    {
241        self.iterated = true;
242        let batch_size = size.iters_per_batch(self.iters);
243        assert!(batch_size != 0, "Batch size must not be zero.");
244        let time_start = Instant::now();
245        self.value = self.measurement.zero();
246
247        if batch_size == 1 {
248            for _ in 0..self.iters {
249                let input = black_box(setup());
250
251                let start = self.measurement.start();
252                let output = routine(input);
253                let end = self.measurement.end(start);
254                self.value = self.measurement.add(&self.value, &end);
255
256                drop(black_box(output));
257            }
258        } else {
259            let mut iteration_counter = 0;
260
261            while iteration_counter < self.iters {
262                let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
263
264                let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
265                let mut outputs = Vec::with_capacity(batch_size as usize);
266
267                let start = self.measurement.start();
268                outputs.extend(inputs.into_iter().map(&mut routine));
269                let end = self.measurement.end(start);
270                self.value = self.measurement.add(&self.value, &end);
271
272                black_box(outputs);
273
274                iteration_counter += batch_size;
275            }
276        }
277
278        self.elapsed_time = time_start.elapsed();
279    }
280
281    /// Times a `routine` that requires some input by generating a batch of input, then timing the
282    /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
283    /// details on choosing the batch size. Use this when the routine should accept the input by
284    /// mutable reference.
285    ///
286    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
287    /// data on each iteration.
288    ///
289    /// # Timing model
290    ///
291    /// ```text
292    /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
293    /// ```
294    ///
295    /// # Example
296    ///
297    /// ```rust
298    /// #[macro_use] extern crate criterion;
299    ///
300    /// use criterion::*;
301    ///
302    /// fn create_scrambled_data() -> Vec<u64> {
303    ///     # vec![]
304    ///     // ...
305    /// }
306    ///
307    /// // The sorting algorithm to test
308    /// fn sort(data: &mut [u64]) {
309    ///     // ...
310    /// }
311    ///
312    /// fn bench(c: &mut Criterion) {
313    ///     let data = create_scrambled_data();
314    ///
315    ///     c.bench_function("with_setup", move |b| {
316    ///         // This will avoid timing the to_vec call.
317    ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
318    ///     });
319    /// }
320    ///
321    /// criterion_group!(benches, bench);
322    /// criterion_main!(benches);
323    /// ```
324    ///
325    #[inline(never)]
326    pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
327    where
328        S: FnMut() -> I,
329        R: FnMut(&mut I) -> O,
330    {
331        self.iterated = true;
332        let batch_size = size.iters_per_batch(self.iters);
333        assert!(batch_size != 0, "Batch size must not be zero.");
334        let time_start = Instant::now();
335        self.value = self.measurement.zero();
336
337        if batch_size == 1 {
338            for _ in 0..self.iters {
339                let mut input = black_box(setup());
340
341                let start = self.measurement.start();
342                let output = routine(&mut input);
343                let end = self.measurement.end(start);
344                self.value = self.measurement.add(&self.value, &end);
345
346                drop(black_box(output));
347                drop(black_box(input));
348            }
349        } else {
350            let mut iteration_counter = 0;
351
352            while iteration_counter < self.iters {
353                let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
354
355                let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
356                let mut outputs = Vec::with_capacity(batch_size as usize);
357
358                let start = self.measurement.start();
359                outputs.extend(inputs.iter_mut().map(&mut routine));
360                let end = self.measurement.end(start);
361                self.value = self.measurement.add(&self.value, &end);
362
363                black_box(outputs);
364
365                iteration_counter += batch_size;
366            }
367        }
368        self.elapsed_time = time_start.elapsed();
369    }
370
371    // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
372    // if they don't.
373    pub(crate) fn assert_iterated(&mut self) {
374        assert!(
375            self.iterated,
376            "Benchmark function must call Bencher::iter or related method."
377        );
378        self.iterated = false;
379    }
380
381    /// Convert this bencher into an AsyncBencher, which enables async/await support.
382    #[cfg(feature = "async")]
383    pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
384        AsyncBencher { b: self, runner }
385    }
386}
387
388/// Async/await variant of the Bencher struct.
389#[cfg(feature = "async")]
390pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
391    b: &'b mut Bencher<'a, M>,
392    runner: A,
393}
394#[cfg(feature = "async")]
395impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
396    /// Times a `routine` by executing it many times and timing the total elapsed time.
397    ///
398    /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
399    ///
400    /// # Timing model
401    ///
402    /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
403    /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
404    /// to the runtime of the `routine`.
405    ///
406    /// ```text
407    /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
408    /// ```
409    ///
410    /// # Example
411    ///
412    /// ```rust
413    /// #[macro_use] extern crate criterion;
414    ///
415    /// use criterion::*;
416    /// use criterion::async_executor::FuturesExecutor;
417    ///
418    /// // The function to benchmark
419    /// async fn foo() {
420    ///     // ...
421    /// }
422    ///
423    /// fn bench(c: &mut Criterion) {
424    ///     c.bench_function("iter", move |b| {
425    ///         b.to_async(FuturesExecutor).iter(|| async { foo().await } )
426    ///     });
427    /// }
428    ///
429    /// criterion_group!(benches, bench);
430    /// criterion_main!(benches);
431    /// ```
432    ///
433    #[inline(never)]
434    pub fn iter<O, R, F>(&mut self, mut routine: R)
435    where
436        R: FnMut() -> F,
437        F: Future<Output = O>,
438    {
439        let AsyncBencher { b, runner } = self;
440        runner.block_on(async {
441            b.iterated = true;
442            let time_start = Instant::now();
443            let start = b.measurement.start();
444            for _ in 0..b.iters {
445                black_box(routine().await);
446            }
447            b.value = b.measurement.end(start);
448            b.elapsed_time = time_start.elapsed();
449        });
450    }
451
452    /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
453    ///
454    /// Prefer this timing loop in cases where `routine` has to do its own measurements to
455    /// get accurate timing information (for example in multi-threaded scenarios where you spawn
456    /// and coordinate with multiple threads).
457    ///
458    /// # Timing model
459    /// Custom, the timing model is whatever is returned as the Duration from `routine`.
460    ///
461    /// # Example
462    /// ```rust
463    /// #[macro_use] extern crate criterion;
464    /// use criterion::*;
465    /// use criterion::black_box;
466    /// use criterion::async_executor::FuturesExecutor;
467    /// use std::time::Instant;
468    ///
469    /// async fn foo() {
470    ///     // ...
471    /// }
472    ///
473    /// fn bench(c: &mut Criterion) {
474    ///     c.bench_function("iter", move |b| {
475    ///         b.to_async(FuturesExecutor).iter_custom(|iters| {
476    ///             async move {
477    ///                 let start = Instant::now();
478    ///                 for _i in 0..iters {
479    ///                     black_box(foo().await);
480    ///                 }
481    ///                 start.elapsed()
482    ///             }
483    ///         })
484    ///     });
485    /// }
486    ///
487    /// criterion_group!(benches, bench);
488    /// criterion_main!(benches);
489    /// ```
490    ///
491    #[inline(never)]
492    pub fn iter_custom<R, F>(&mut self, mut routine: R)
493    where
494        R: FnMut(u64) -> F,
495        F: Future<Output = M::Value>,
496    {
497        let AsyncBencher { b, runner } = self;
498        runner.block_on(async {
499            b.iterated = true;
500            let time_start = Instant::now();
501            b.value = routine(b.iters).await;
502            b.elapsed_time = time_start.elapsed();
503        })
504    }
505
506    #[doc(hidden)]
507    pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
508    where
509        S: FnMut() -> I,
510        R: FnMut(I) -> F,
511        F: Future<Output = O>,
512    {
513        self.iter_batched(setup, routine, BatchSize::PerIteration);
514    }
515
516    /// Times a `routine` by collecting its output on each iteration. This avoids timing the
517    /// destructor of the value returned by `routine`.
518    ///
519    /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
520    /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
521    ///
522    /// # Timing model
523    ///
524    /// ``` text
525    /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
526    /// ```
527    ///
528    /// # Example
529    ///
530    /// ```rust
531    /// #[macro_use] extern crate criterion;
532    ///
533    /// use criterion::*;
534    /// use criterion::async_executor::FuturesExecutor;
535    ///
536    /// async fn create_vector() -> Vec<u64> {
537    ///     # vec![]
538    ///     // ...
539    /// }
540    ///
541    /// fn bench(c: &mut Criterion) {
542    ///     c.bench_function("with_drop", move |b| {
543    ///         // This will avoid timing the Vec::drop.
544    ///         b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await })
545    ///     });
546    /// }
547    ///
548    /// criterion_group!(benches, bench);
549    /// criterion_main!(benches);
550    /// ```
551    ///
552    pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
553    where
554        R: FnMut() -> F,
555        F: Future<Output = O>,
556    {
557        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
558    }
559
560    #[doc(hidden)]
561    pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
562    where
563        S: FnMut() -> I,
564        R: FnMut(I) -> F,
565        F: Future<Output = O>,
566    {
567        self.iter_batched(setup, routine, BatchSize::NumBatches(1));
568    }
569
570    /// Times a `routine` that requires some input by generating a batch of input, then timing the
571    /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
572    /// details on choosing the batch size. Use this when the routine must consume its input.
573    ///
574    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
575    /// data on each iteration.
576    ///
577    /// # Timing model
578    ///
579    /// ```text
580    /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
581    /// ```
582    ///
583    /// # Example
584    ///
585    /// ```rust
586    /// #[macro_use] extern crate criterion;
587    ///
588    /// use criterion::*;
589    /// use criterion::async_executor::FuturesExecutor;
590    ///
591    /// fn create_scrambled_data() -> Vec<u64> {
592    ///     # vec![]
593    ///     // ...
594    /// }
595    ///
596    /// // The sorting algorithm to test
597    /// async fn sort(data: &mut [u64]) {
598    ///     // ...
599    /// }
600    ///
601    /// fn bench(c: &mut Criterion) {
602    ///     let data = create_scrambled_data();
603    ///
604    ///     c.bench_function("with_setup", move |b| {
605    ///         // This will avoid timing the to_vec call.
606    ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
607    ///     });
608    /// }
609    ///
610    /// criterion_group!(benches, bench);
611    /// criterion_main!(benches);
612    /// ```
613    ///
614    #[inline(never)]
615    pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
616    where
617        S: FnMut() -> I,
618        R: FnMut(I) -> F,
619        F: Future<Output = O>,
620    {
621        let AsyncBencher { b, runner } = self;
622        runner.block_on(async {
623            b.iterated = true;
624            let batch_size = size.iters_per_batch(b.iters);
625            assert!(batch_size != 0, "Batch size must not be zero.");
626            let time_start = Instant::now();
627            b.value = b.measurement.zero();
628
629            if batch_size == 1 {
630                for _ in 0..b.iters {
631                    let input = black_box(setup());
632
633                    let start = b.measurement.start();
634                    let output = routine(input).await;
635                    let end = b.measurement.end(start);
636                    b.value = b.measurement.add(&b.value, &end);
637
638                    drop(black_box(output));
639                }
640            } else {
641                let mut iteration_counter = 0;
642
643                while iteration_counter < b.iters {
644                    let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
645
646                    let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
647                    let mut outputs = Vec::with_capacity(batch_size as usize);
648
649                    let start = b.measurement.start();
650                    // Can't use .extend here like the sync version does
651                    for input in inputs {
652                        outputs.push(routine(input).await);
653                    }
654                    let end = b.measurement.end(start);
655                    b.value = b.measurement.add(&b.value, &end);
656
657                    black_box(outputs);
658
659                    iteration_counter += batch_size;
660                }
661            }
662
663            b.elapsed_time = time_start.elapsed();
664        })
665    }
666
667    /// Times a `routine` that requires some input by generating a batch of input, then timing the
668    /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
669    /// details on choosing the batch size. Use this when the routine should accept the input by
670    /// mutable reference.
671    ///
672    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
673    /// data on each iteration.
674    ///
675    /// # Timing model
676    ///
677    /// ```text
678    /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
679    /// ```
680    ///
681    /// # Example
682    ///
683    /// ```rust
684    /// #[macro_use] extern crate criterion;
685    ///
686    /// use criterion::*;
687    /// use criterion::async_executor::FuturesExecutor;
688    ///
689    /// fn create_scrambled_data() -> Vec<u64> {
690    ///     # vec![]
691    ///     // ...
692    /// }
693    ///
694    /// // The sorting algorithm to test
695    /// async fn sort(data: &mut [u64]) {
696    ///     // ...
697    /// }
698    ///
699    /// fn bench(c: &mut Criterion) {
700    ///     let data = create_scrambled_data();
701    ///
702    ///     c.bench_function("with_setup", move |b| {
703    ///         // This will avoid timing the to_vec call.
704    ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
705    ///     });
706    /// }
707    ///
708    /// criterion_group!(benches, bench);
709    /// criterion_main!(benches);
710    /// ```
711    ///
712    #[inline(never)]
713    pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
714    where
715        S: FnMut() -> I,
716        R: FnMut(&mut I) -> F,
717        F: Future<Output = O>,
718    {
719        let AsyncBencher { b, runner } = self;
720        runner.block_on(async {
721            b.iterated = true;
722            let batch_size = size.iters_per_batch(b.iters);
723            assert!(batch_size != 0, "Batch size must not be zero.");
724            let time_start = Instant::now();
725            b.value = b.measurement.zero();
726
727            if batch_size == 1 {
728                for _ in 0..b.iters {
729                    let mut input = black_box(setup());
730
731                    let start = b.measurement.start();
732                    let output = routine(&mut input).await;
733                    let end = b.measurement.end(start);
734                    b.value = b.measurement.add(&b.value, &end);
735
736                    drop(black_box(output));
737                    drop(black_box(input));
738                }
739            } else {
740                let mut iteration_counter = 0;
741
742                while iteration_counter < b.iters {
743                    let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
744
745                    let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
746                    let mut outputs = Vec::with_capacity(batch_size as usize);
747
748                    let start = b.measurement.start();
749                    // Can't use .extend here like the sync version does
750                    for mut input in inputs {
751                        outputs.push(routine(&mut input).await);
752                    }
753                    let end = b.measurement.end(start);
754                    b.value = b.measurement.add(&b.value, &end);
755
756                    black_box(outputs);
757
758                    iteration_counter += batch_size;
759                }
760            }
761            b.elapsed_time = time_start.elapsed();
762        });
763    }
764}