hdrhistogram/iterators/quantile.rs
1use crate::core::counter::Counter;
2use crate::iterators::{HistogramIterator, PickMetadata, PickyIterator};
3use crate::Histogram;
4
5/// An iterator that will yield at quantile steps through the histogram's value range.
6pub struct Iter<'a, T: 'a + Counter> {
7 hist: &'a Histogram<T>,
8 ticks_per_half_distance: u32,
9 quantile_to_iterate_to: f64,
10 reached_end: bool,
11}
12
13impl<'a, T: 'a + Counter> Iter<'a, T> {
14 /// Construct a new iterator. See `Histogram::iter_quantiles` for details.
15 pub fn new(
16 hist: &'a Histogram<T>,
17 ticks_per_half_distance: u32,
18 ) -> HistogramIterator<'a, T, Iter<'a, T>> {
19 assert!(
20 ticks_per_half_distance > 0,
21 "Ticks per half distance must be > 0"
22 );
23
24 HistogramIterator::new(
25 hist,
26 Iter {
27 hist,
28 ticks_per_half_distance,
29 quantile_to_iterate_to: 0.0,
30 reached_end: false,
31 },
32 )
33 }
34}
35
36impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
37 #[allow(clippy::float_cmp)]
38 fn pick(&mut self, _: usize, running_total: u64, count_at_index: T) -> Option<PickMetadata> {
39 if count_at_index == T::zero() {
40 return None;
41 }
42
43 // This calculation, combined with the `quantile * count` in `value_at_quantile`, tends
44 // to produce a count_at_quantile that is 1 ulp wrong. That's just the way IEEE754 works.
45 let current_quantile = running_total as f64 / self.hist.len() as f64;
46 if current_quantile < self.quantile_to_iterate_to {
47 return None;
48 }
49
50 // Because there are effectively two quantiles in play (the quantile of the value for the
51 // bucket we're at aka "value quantile", and the quantile we're iterating to aka "iteration
52 // quantile", which may be significantly different, especially in highly non-uniform
53 // distributions), the behavior around 1.0 is a little tricky.
54 //
55 // The desired behavior is that we always iterate until the iteration quantile reaches 1.0,
56 // but if the value quantile reaches 1.0 (by reaching the last index with a non-zero count)
57 // before that point, we skip the remaining intermediate iterations in that same index and
58 // jump straight to iteration quantile 1.0.
59 // This is effectively a policy decision, but it is consistent with other iterators: they
60 // don't just stop when the quantile reaches 1.0 upon first entering the final bucket.
61 // At the same time, it's arguably unhelpful to have a bunch of all-but-identical quantile
62 // ticks, hence skipping the intermediate iterations. (This is also how the Java impl
63 // behaves.)
64 //
65 // Note that it is impossible to have the value quantile lower than the iteration quantile
66 // since the value quantile incorporates the count for the entire bucket when it's first
67 // entered, while the hypothetical fractional count that the iteration quantile would use is
68 // necessarily less than that.
69 //
70 // Cases for ending iteration:
71 // 1. Iteration quantile reaches 1.0 along with the value quantile reaching 1.0 at the max
72 // value index
73 // 2. Iteration quantile is below 1.0 when the value quantile reaches 1.0 at the max value
74 // index
75 // 3. Same as #1, but not at the max value index because total count has saturated. This
76 // means that more() will not be called.
77 // 4. Same as #2, but not at the max value index because total count has saturated. See #3.
78
79 if self.reached_end {
80 // #3, #4 part 2: Need to check here, not just in `more()`: when we see quantile 1.0 and
81 // set `reached_end`, `more()` will not be called (because we haven't reached the last
82 // non-zero-count index) so it can't stop iteration, and we must stop it here.
83 //
84 // This will be hit for all remaining non-zero-count indices, then control will proceed
85 // to `more()`.
86 return None;
87 }
88
89 // #1: If we reach iteration quantile 1.0 at the same time as value quantile 1.0 (because we
90 // moved to the final non-zero-count index exactly when the iteration ticked over to 1.0),
91 // we want to emit a value at that point, but not proceed past that.
92 // #2, last phase: This could also be the second visit to the max value index in the #2 case
93 // where `quantile_to_iterate_to` has been set to 1.0.
94 // #3, #4 last phase: Similar, but iteration proceeded normally up to 1.0 without any
95 // last-bucket skipping because it wasn't at the last bucket.
96 if self.quantile_to_iterate_to == 1.0 {
97 // We want to pick this value but not do the math below because it doesn't work when
98 // quantile >= 1.0.
99 //
100 // We also want to prevent any further iteration.
101 self.reached_end = true;
102 return Some(PickMetadata::new(Some(1.0), None));
103 }
104
105 // #2, first phase:
106 // Value quantile reached 1.0 while the iteration quantile is somewhere below 1.0 (it can be
107 // arbitrarily close to 0 for lopsided histograms). So, we continue with normal quantile
108 // tick logic for the first time, and pick up the #2 case in `more()` below.
109
110 // The choice to maintain fixed-sized "ticks" in each half-distance to 100% [starting from
111 // 0%], as opposed to a "tick" size that varies with each interval, was made to make the
112 // steps easily comprehensible and readable to humans. The resulting quantile steps are
113 // much easier to browse through in a quantile distribution output, for example.
114 //
115 // We calculate the number of equal-sized "ticks" that the 0-1 range will be divided by
116 // at the current scale. The scale is determined by the quantile level we are iterating
117 // to. The following math determines the tick size for the current scale, and maintain a
118 // fixed tick size for the remaining "half the distance to 100%" [from either 0% or from
119 // the previous half-distance]. When that half-distance is crossed, the scale changes and
120 // the tick size is effectively cut in half.
121 //
122 // Calculate the number of times we've halved the distance to 100%, This is 1 at 50%, 2 at
123 // 75%, 3 at 87.5%, etc. 2 ^ num_halvings is the number of slices that will fit into 100%.
124 // At 50%, num_halvings would be 1, so 2 ^ 1 would yield 2 slices, etc. At any given number
125 // of slices, the last slice is what we're going to traverse the first half of. With 1 total
126 // slice, traverse half to get to 50%. Then traverse half of the last (second) slice to get
127 // to 75%, etc.
128 // Minimum of 0 (1.0/1.0 = 1, log 2 of which is 0) so unsigned cast is safe.
129 // Won't hit the `inf` case because quantile < 1.0, so this should yield an actual number.
130 let num_halvings = (1.0 / (1.0 - self.quantile_to_iterate_to)).log2() as u32;
131 // Calculate the total number of ticks in 0-1 given that half of each slice is tick'd.
132 // The number of slices is 2 ^ num_halvings, and each slice has two "half distances" to
133 // tick, so we add an extra power of two to get ticks per whole distance.
134 // Use u64 math so that there's less risk of overflow with large numbers of ticks and data
135 // that ends up needing large numbers of halvings.
136 let total_ticks = u64::from(self.ticks_per_half_distance)
137 .checked_mul(
138 1_u64
139 .checked_shl(num_halvings + 1)
140 .expect("too many halvings"),
141 )
142 .expect("too many total ticks");
143 let increment_size = 1.0_f64 / total_ticks as f64;
144
145 let metadata = PickMetadata::new(Some(self.quantile_to_iterate_to), None);
146
147 let sum = self.quantile_to_iterate_to + increment_size;
148 self.quantile_to_iterate_to = if sum == self.quantile_to_iterate_to {
149 // the iteration has reached the point where the increment is too small to actually
150 // change an f64 slightly smaller than 1.0, so just short circuit to 1.0.
151 // This happens easily in case #4, and plausibly in #3: it will iterate up to 1.0
152 // without any skipping, which will
153 1.0
154 } else {
155 sum
156 };
157 Some(metadata)
158 }
159
160 fn more(&mut self, _: usize) -> bool {
161 // One of the end cases has already declared we're done.
162 if self.reached_end {
163 return false;
164 }
165
166 // #2, middle phase: already picked the max-value index once with iteration quantile < 1.0,
167 // and `more()` is now called (for the first time), so iterate one more time, but jump to
168 // quantile 1.0 while doing so. We don't set `reached_end` here because we do want 1 more
169 // iteration.
170 self.quantile_to_iterate_to = 1.0;
171 true
172 }
173}