hdrhistogram/iterators/quantile.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
use crate::core::counter::Counter;
use crate::iterators::{HistogramIterator, PickMetadata, PickyIterator};
use crate::Histogram;
/// An iterator that will yield at quantile steps through the histogram's value range.
pub struct Iter<'a, T: 'a + Counter> {
hist: &'a Histogram<T>,
ticks_per_half_distance: u32,
quantile_to_iterate_to: f64,
reached_end: bool,
}
impl<'a, T: 'a + Counter> Iter<'a, T> {
/// Construct a new iterator. See `Histogram::iter_quantiles` for details.
pub fn new(
hist: &'a Histogram<T>,
ticks_per_half_distance: u32,
) -> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(
ticks_per_half_distance > 0,
"Ticks per half distance must be > 0"
);
HistogramIterator::new(
hist,
Iter {
hist,
ticks_per_half_distance,
quantile_to_iterate_to: 0.0,
reached_end: false,
},
)
}
}
impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
#[allow(clippy::float_cmp)]
fn pick(&mut self, _: usize, running_total: u64, count_at_index: T) -> Option<PickMetadata> {
if count_at_index == T::zero() {
return None;
}
// This calculation, combined with the `quantile * count` in `value_at_quantile`, tends
// to produce a count_at_quantile that is 1 ulp wrong. That's just the way IEEE754 works.
let current_quantile = running_total as f64 / self.hist.len() as f64;
if current_quantile < self.quantile_to_iterate_to {
return None;
}
// Because there are effectively two quantiles in play (the quantile of the value for the
// bucket we're at aka "value quantile", and the quantile we're iterating to aka "iteration
// quantile", which may be significantly different, especially in highly non-uniform
// distributions), the behavior around 1.0 is a little tricky.
//
// The desired behavior is that we always iterate until the iteration quantile reaches 1.0,
// but if the value quantile reaches 1.0 (by reaching the last index with a non-zero count)
// before that point, we skip the remaining intermediate iterations in that same index and
// jump straight to iteration quantile 1.0.
// This is effectively a policy decision, but it is consistent with other iterators: they
// don't just stop when the quantile reaches 1.0 upon first entering the final bucket.
// At the same time, it's arguably unhelpful to have a bunch of all-but-identical quantile
// ticks, hence skipping the intermediate iterations. (This is also how the Java impl
// behaves.)
//
// Note that it is impossible to have the value quantile lower than the iteration quantile
// since the value quantile incorporates the count for the entire bucket when it's first
// entered, while the hypothetical fractional count that the iteration quantile would use is
// necessarily less than that.
//
// Cases for ending iteration:
// 1. Iteration quantile reaches 1.0 along with the value quantile reaching 1.0 at the max
// value index
// 2. Iteration quantile is below 1.0 when the value quantile reaches 1.0 at the max value
// index
// 3. Same as #1, but not at the max value index because total count has saturated. This
// means that more() will not be called.
// 4. Same as #2, but not at the max value index because total count has saturated. See #3.
if self.reached_end {
// #3, #4 part 2: Need to check here, not just in `more()`: when we see quantile 1.0 and
// set `reached_end`, `more()` will not be called (because we haven't reached the last
// non-zero-count index) so it can't stop iteration, and we must stop it here.
//
// This will be hit for all remaining non-zero-count indices, then control will proceed
// to `more()`.
return None;
}
// #1: If we reach iteration quantile 1.0 at the same time as value quantile 1.0 (because we
// moved to the final non-zero-count index exactly when the iteration ticked over to 1.0),
// we want to emit a value at that point, but not proceed past that.
// #2, last phase: This could also be the second visit to the max value index in the #2 case
// where `quantile_to_iterate_to` has been set to 1.0.
// #3, #4 last phase: Similar, but iteration proceeded normally up to 1.0 without any
// last-bucket skipping because it wasn't at the last bucket.
if self.quantile_to_iterate_to == 1.0 {
// We want to pick this value but not do the math below because it doesn't work when
// quantile >= 1.0.
//
// We also want to prevent any further iteration.
self.reached_end = true;
return Some(PickMetadata::new(Some(1.0), None));
}
// #2, first phase:
// Value quantile reached 1.0 while the iteration quantile is somewhere below 1.0 (it can be
// arbitrarily close to 0 for lopsided histograms). So, we continue with normal quantile
// tick logic for the first time, and pick up the #2 case in `more()` below.
// The choice to maintain fixed-sized "ticks" in each half-distance to 100% [starting from
// 0%], as opposed to a "tick" size that varies with each interval, was made to make the
// steps easily comprehensible and readable to humans. The resulting quantile steps are
// much easier to browse through in a quantile distribution output, for example.
//
// We calculate the number of equal-sized "ticks" that the 0-1 range will be divided by
// at the current scale. The scale is determined by the quantile level we are iterating
// to. The following math determines the tick size for the current scale, and maintain a
// fixed tick size for the remaining "half the distance to 100%" [from either 0% or from
// the previous half-distance]. When that half-distance is crossed, the scale changes and
// the tick size is effectively cut in half.
//
// Calculate the number of times we've halved the distance to 100%, This is 1 at 50%, 2 at
// 75%, 3 at 87.5%, etc. 2 ^ num_halvings is the number of slices that will fit into 100%.
// At 50%, num_halvings would be 1, so 2 ^ 1 would yield 2 slices, etc. At any given number
// of slices, the last slice is what we're going to traverse the first half of. With 1 total
// slice, traverse half to get to 50%. Then traverse half of the last (second) slice to get
// to 75%, etc.
// Minimum of 0 (1.0/1.0 = 1, log 2 of which is 0) so unsigned cast is safe.
// Won't hit the `inf` case because quantile < 1.0, so this should yield an actual number.
let num_halvings = (1.0 / (1.0 - self.quantile_to_iterate_to)).log2() as u32;
// Calculate the total number of ticks in 0-1 given that half of each slice is tick'd.
// The number of slices is 2 ^ num_halvings, and each slice has two "half distances" to
// tick, so we add an extra power of two to get ticks per whole distance.
// Use u64 math so that there's less risk of overflow with large numbers of ticks and data
// that ends up needing large numbers of halvings.
let total_ticks = u64::from(self.ticks_per_half_distance)
.checked_mul(
1_u64
.checked_shl(num_halvings + 1)
.expect("too many halvings"),
)
.expect("too many total ticks");
let increment_size = 1.0_f64 / total_ticks as f64;
let metadata = PickMetadata::new(Some(self.quantile_to_iterate_to), None);
let sum = self.quantile_to_iterate_to + increment_size;
self.quantile_to_iterate_to = if sum == self.quantile_to_iterate_to {
// the iteration has reached the point where the increment is too small to actually
// change an f64 slightly smaller than 1.0, so just short circuit to 1.0.
// This happens easily in case #4, and plausibly in #3: it will iterate up to 1.0
// without any skipping, which will
1.0
} else {
sum
};
Some(metadata)
}
fn more(&mut self, _: usize) -> bool {
// One of the end cases has already declared we're done.
if self.reached_end {
return false;
}
// #2, middle phase: already picked the max-value index once with iteration quantile < 1.0,
// and `more()` is now called (for the first time), so iterate one more time, but jump to
// quantile 1.0 while doing so. We don't set `reached_end` here because we do want 1 more
// iteration.
self.quantile_to_iterate_to = 1.0;
true
}
}