Revision control
Copy as Markdown
Other Tools
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
use std::collections::HashMap;
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use super::{Bucketing, Histogram};
use crate::util::floating_point_context::FloatingPointContext;
/// Create the possible ranges in an exponential distribution from `min` to `max` with
/// `bucket_count` buckets.
///
/// This algorithm calculates the bucket sizes using a natural log approach to get `bucket_count` number of buckets,
/// exponentially spaced between `min` and `max`
///
/// Bucket limits are the minimal bucket value.
/// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`.
/// It will always contain an underflow bucket (`< 1`).
fn exponential_range(min: u64, max: u64, bucket_count: usize) -> Vec<u64> {
// Set the FPU control flag to the required state within this function
let _fpc = FloatingPointContext::new();
let log_max = (max as f64).ln();
let mut ranges = Vec::with_capacity(bucket_count);
let mut current = min;
if current == 0 {
current = 1;
}
// undeflow bucket
ranges.push(0);
ranges.push(current);
for i in 2..bucket_count {
let log_current = (current as f64).ln();
let log_ratio = (log_max - log_current) / (bucket_count - i) as f64;
let log_next = log_current + log_ratio;
let next_value = log_next.exp().round() as u64;
current = if next_value > current {
next_value
} else {
current + 1
};
ranges.push(current);
}
ranges
}
/// An exponential bucketing algorithm.
///
/// Buckets are pre-computed at instantiation with an exponential distribution from `min` to `max`
/// and `bucket_count` buckets.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PrecomputedExponential {
// Don't serialize the (potentially large) array of ranges, instead compute them on first
// access.
#[serde(skip)]
pub(crate) bucket_ranges: OnceCell<Vec<u64>>,
pub(crate) min: u64,
pub(crate) max: u64,
pub(crate) bucket_count: usize,
}
impl Bucketing for PrecomputedExponential {
/// Get the bucket for the sample.
///
/// This uses a binary search to locate the index `i` of the bucket such that:
/// bucket[i] <= sample < bucket[i+1]
fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
let limit = match self.ranges().binary_search(&sample) {
// Found an exact match to fit it in
Ok(i) => i,
// Sorted it fits after the bucket's limit, therefore it fits into the previous bucket
Err(i) => i - 1,
};
self.ranges()[limit]
}
fn ranges(&self) -> &[u64] {
// Create the exponential range on first access.
self.bucket_ranges
.get_or_init(|| exponential_range(self.min, self.max, self.bucket_count))
}
}
impl Histogram<PrecomputedExponential> {
/// Creates a histogram with `count` exponential buckets in the range `min` to `max`.
pub fn exponential(
min: u64,
max: u64,
bucket_count: usize,
) -> Histogram<PrecomputedExponential> {
Histogram {
values: HashMap::new(),
count: 0,
sum: 0,
bucketing: PrecomputedExponential {
bucket_ranges: OnceCell::new(),
min,
max,
bucket_count,
},
}
}
}
#[cfg(test)]
mod test {
use super::*;
const DEFAULT_BUCKET_COUNT: usize = 100;
const DEFAULT_RANGE_MIN: u64 = 0;
const DEFAULT_RANGE_MAX: u64 = 60_000;
#[test]
fn can_count() {
let mut hist = Histogram::exponential(1, 500, 10);
assert!(hist.is_empty());
for i in 1..=10 {
hist.accumulate(i);
}
assert_eq!(10, hist.count());
assert_eq!(55, hist.sum());
}
#[test]
fn overflow_values_accumulate_in_the_last_bucket() {
let mut hist =
Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT);
hist.accumulate(DEFAULT_RANGE_MAX + 100);
assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]);
}
#[test]
fn short_exponential_buckets_are_correct() {
let test_buckets = vec![0, 1, 2, 3, 5, 9, 16, 29, 54, 100];
assert_eq!(test_buckets, exponential_range(1, 100, 10));
// There's always a zero bucket, so we increase the lower limit.
assert_eq!(test_buckets, exponential_range(0, 100, 10));
}
#[test]
fn default_exponential_buckets_are_correct() {
// Hand calculated values using current default range 0 - 60000 and bucket count of 100.
// NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any
// values beyond the maximum (in this case the maximum is 60000)
let test_buckets = vec![
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 23, 25, 28, 31, 34,
38, 42, 46, 51, 56, 62, 68, 75, 83, 92, 101, 111, 122, 135, 149, 164, 181, 200, 221,
244, 269, 297, 328, 362, 399, 440, 485, 535, 590, 651, 718, 792, 874, 964, 1064, 1174,
1295, 1429, 1577, 1740, 1920, 2118, 2337, 2579, 2846, 3140, 3464, 3822, 4217, 4653,
5134, 5665, 6250, 6896, 7609, 8395, 9262, 10219, 11275, 12440, 13726, 15144, 16709,
18436, 20341, 22443, 24762, 27321, 30144, 33259, 36696, 40488, 44672, 49288, 54381,
60000,
];
assert_eq!(
test_buckets,
exponential_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT)
);
}
#[test]
fn default_buckets_correctly_accumulate() {
let mut hist =
Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT);
for i in &[1, 10, 100, 1000, 10000] {
hist.accumulate(*i);
}
assert_eq!(11111, hist.sum());
assert_eq!(5, hist.count());
assert_eq!(None, hist.values.get(&0)); // underflow is empty
assert_eq!(1, hist.values[&1]); // bucket_ranges[1] = 1
assert_eq!(1, hist.values[&10]); // bucket_ranges[10] = 10
assert_eq!(1, hist.values[&92]); // bucket_ranges[33] = 92
assert_eq!(1, hist.values[&964]); // bucket_ranges[57] = 964
assert_eq!(1, hist.values[&9262]); // bucket_ranges[80] = 9262
}
#[test]
fn accumulate_large_numbers() {
let mut hist = Histogram::exponential(1, 500, 10);
hist.accumulate(u64::MAX);
hist.accumulate(u64::MAX);
assert_eq!(2, hist.count());
// Saturate before overflowing
assert_eq!(u64::MAX, hist.sum());
assert_eq!(2, hist.values[&500]);
}
}