hdrhistogram/iterators/
quantile.rs

1use crate::core::counter::Counter;
2use crate::iterators::{HistogramIterator, PickMetadata, PickyIterator};
3use crate::Histogram;
4
5/// An iterator that will yield at quantile steps through the histogram's value range.
6pub struct Iter<'a, T: 'a + Counter> {
7    hist: &'a Histogram<T>,
8    ticks_per_half_distance: u32,
9    quantile_to_iterate_to: f64,
10    reached_end: bool,
11}
12
13impl<'a, T: 'a + Counter> Iter<'a, T> {
14    /// Construct a new iterator. See `Histogram::iter_quantiles` for details.
15    pub fn new(
16        hist: &'a Histogram<T>,
17        ticks_per_half_distance: u32,
18    ) -> HistogramIterator<'a, T, Iter<'a, T>> {
19        assert!(
20            ticks_per_half_distance > 0,
21            "Ticks per half distance must be > 0"
22        );
23
24        HistogramIterator::new(
25            hist,
26            Iter {
27                hist,
28                ticks_per_half_distance,
29                quantile_to_iterate_to: 0.0,
30                reached_end: false,
31            },
32        )
33    }
34}
35
36impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
37    #[allow(clippy::float_cmp)]
38    fn pick(&mut self, _: usize, running_total: u64, count_at_index: T) -> Option<PickMetadata> {
39        if count_at_index == T::zero() {
40            return None;
41        }
42
43        // This calculation, combined with the `quantile * count` in `value_at_quantile`, tends
44        // to produce a count_at_quantile that is 1 ulp wrong. That's just the way IEEE754 works.
45        let current_quantile = running_total as f64 / self.hist.len() as f64;
46        if current_quantile < self.quantile_to_iterate_to {
47            return None;
48        }
49
50        // Because there are effectively two quantiles in play (the quantile of the value for the
51        // bucket we're at aka "value quantile", and the quantile we're iterating to aka "iteration
52        // quantile", which may be significantly different, especially in highly non-uniform
53        // distributions), the behavior around 1.0 is a little tricky.
54        //
55        // The desired behavior is that we always iterate until the iteration quantile reaches 1.0,
56        // but if the value quantile reaches 1.0 (by reaching the last index with a non-zero count)
57        // before that point, we skip the remaining intermediate iterations in that same index and
58        // jump straight to iteration quantile 1.0.
59        // This is effectively a policy decision, but it is consistent with other iterators: they
60        // don't just stop when the quantile reaches 1.0 upon first entering the final bucket.
61        // At the same time, it's arguably unhelpful to have a bunch of all-but-identical quantile
62        // ticks, hence skipping the intermediate iterations. (This is also how the Java impl
63        // behaves.)
64        //
65        // Note that it is impossible to have the value quantile lower than the iteration quantile
66        // since the value quantile incorporates the count for the entire bucket when it's first
67        // entered, while the hypothetical fractional count that the iteration quantile would use is
68        // necessarily less than that.
69        //
70        // Cases for ending iteration:
71        // 1. Iteration quantile reaches 1.0 along with the value quantile reaching 1.0 at the max
72        //    value index
73        // 2. Iteration quantile is below 1.0 when the value quantile reaches 1.0 at the max value
74        //    index
75        // 3. Same as #1, but not at the max value index because total count has saturated. This
76        //    means that more() will not be called.
77        // 4. Same as #2, but not at the max value index because total count has saturated. See #3.
78
79        if self.reached_end {
80            // #3, #4 part 2: Need to check here, not just in `more()`: when we see quantile 1.0 and
81            // set `reached_end`, `more()` will not be called (because we haven't reached the last
82            // non-zero-count index) so it can't stop iteration, and we must stop it here.
83            //
84            // This will be hit for all remaining non-zero-count indices, then control will proceed
85            // to `more()`.
86            return None;
87        }
88
89        // #1: If we reach iteration quantile 1.0 at the same time as value quantile 1.0 (because we
90        // moved to the final non-zero-count index exactly when the iteration ticked over to 1.0),
91        // we want to emit a value at that point, but not proceed past that.
92        // #2, last phase: This could also be the second visit to the max value index in the #2 case
93        // where `quantile_to_iterate_to` has been set to 1.0.
94        // #3, #4 last phase: Similar, but iteration proceeded normally up to 1.0 without any
95        // last-bucket skipping because it wasn't at the last bucket.
96        if self.quantile_to_iterate_to == 1.0 {
97            // We want to pick this value but not do the math below because it doesn't work when
98            // quantile >= 1.0.
99            //
100            // We also want to prevent any further iteration.
101            self.reached_end = true;
102            return Some(PickMetadata::new(Some(1.0), None));
103        }
104
105        // #2, first phase:
106        // Value quantile reached 1.0 while the iteration quantile is somewhere below 1.0 (it can be
107        // arbitrarily close to 0 for lopsided histograms). So, we continue with normal quantile
108        // tick logic for the first time, and pick up the #2 case in `more()` below.
109
110        // The choice to maintain fixed-sized "ticks" in each half-distance to 100% [starting from
111        // 0%], as opposed to a "tick" size that varies with each interval, was made to make the
112        // steps easily comprehensible and readable to humans. The resulting quantile steps are
113        // much easier to browse through in a quantile distribution output, for example.
114        //
115        // We calculate the number of equal-sized "ticks" that the 0-1 range will be divided by
116        // at the current scale. The scale is determined by the quantile level we are iterating
117        // to. The following math determines the tick size for the current scale, and maintain a
118        // fixed tick size for the remaining "half the distance to 100%" [from either 0% or from
119        // the previous half-distance]. When that half-distance is crossed, the scale changes and
120        // the tick size is effectively cut in half.
121        //
122        // Calculate the number of times we've halved the distance to 100%, This is 1 at 50%, 2 at
123        // 75%, 3 at 87.5%, etc. 2 ^ num_halvings is the number of slices that will fit into 100%.
124        // At 50%, num_halvings would be 1, so 2 ^ 1 would yield 2 slices, etc. At any given number
125        // of slices, the last slice is what we're going to traverse the first half of. With 1 total
126        // slice, traverse half to get to 50%. Then traverse half of the last (second) slice to get
127        // to 75%, etc.
128        // Minimum of 0 (1.0/1.0 = 1, log 2 of which is 0) so unsigned cast is safe.
129        // Won't hit the `inf` case because quantile < 1.0, so this should yield an actual number.
130        let num_halvings = (1.0 / (1.0 - self.quantile_to_iterate_to)).log2() as u32;
131        // Calculate the total number of ticks in 0-1 given that half of each slice is tick'd.
132        // The number of slices is 2 ^ num_halvings, and each slice has two "half distances" to
133        // tick, so we add an extra power of two to get ticks per whole distance.
134        // Use u64 math so that there's less risk of overflow with large numbers of ticks and data
135        // that ends up needing large numbers of halvings.
136        let total_ticks = u64::from(self.ticks_per_half_distance)
137            .checked_mul(
138                1_u64
139                    .checked_shl(num_halvings + 1)
140                    .expect("too many halvings"),
141            )
142            .expect("too many total ticks");
143        let increment_size = 1.0_f64 / total_ticks as f64;
144
145        let metadata = PickMetadata::new(Some(self.quantile_to_iterate_to), None);
146
147        let sum = self.quantile_to_iterate_to + increment_size;
148        self.quantile_to_iterate_to = if sum == self.quantile_to_iterate_to {
149            // the iteration has reached the point where the increment is too small to actually
150            // change an f64 slightly smaller than 1.0, so just short circuit to 1.0.
151            // This happens easily in case #4, and plausibly in #3: it will iterate up to 1.0
152            // without any skipping, which will
153            1.0
154        } else {
155            sum
156        };
157        Some(metadata)
158    }
159
160    fn more(&mut self, _: usize) -> bool {
161        // One of the end cases has already declared we're done.
162        if self.reached_end {
163            return false;
164        }
165
166        // #2, middle phase: already picked the max-value index once with iteration quantile < 1.0,
167        // and `more()` is now called (for the first time), so iterate one more time, but jump to
168        // quantile 1.0 while doing so. We don't set `reached_end` here because we do want 1 more
169        // iteration.
170        self.quantile_to_iterate_to = 1.0;
171        true
172    }
173}