Revision control
Copy as Markdown
Other Tools
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
//! A simple histogram implementation for exponential histograms.
use std::any::TypeId;
use std::collections::HashMap;
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use crate::error::{Error, ErrorKind};
pub use exponential::PrecomputedExponential;
pub use functional::Functional;
pub use linear::PrecomputedLinear;
mod exponential;
mod functional;
mod linear;
/// Different kinds of histograms.
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum HistogramType {
/// A histogram with linear distributed buckets.
Linear,
/// A histogram with exponential distributed buckets.
Exponential,
}
impl TryFrom<i32> for HistogramType {
type Error = Error;
fn try_from(value: i32) -> Result<HistogramType, Self::Error> {
match value {
0 => Ok(HistogramType::Linear),
1 => Ok(HistogramType::Exponential),
e => Err(ErrorKind::HistogramType(e).into()),
}
}
}
/// A histogram.
///
/// Stores the counts per bucket and tracks the count of added samples and the total sum.
/// The bucketing algorithm can be changed.
///
/// ## Example
///
/// ```rust,ignore
/// let mut hist = Histogram::exponential(1, 500, 10);
///
/// for i in 1..=10 {
/// hist.accumulate(i);
/// }
///
/// assert_eq!(10, hist.count());
/// assert_eq!(55, hist.sum());
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Histogram<B> {
/// Mapping bucket's minimum to sample count.
values: HashMap<u64, u64>,
/// The count of samples added.
count: u64,
/// The total sum of samples.
sum: u64,
/// The bucketing algorithm used.
bucketing: B,
}
/// A bucketing algorithm for histograms.
///
/// It's responsible to calculate the bucket a sample goes into.
/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed.
pub trait Bucketing {
/// Get the bucket's minimum value the sample falls into.
fn sample_to_bucket_minimum(&self, sample: u64) -> u64;
/// The computed bucket ranges for this bucketing algorithm.
fn ranges(&self) -> &[u64];
}
impl<B: Bucketing> Histogram<B> {
/// Gets the number of buckets in this histogram.
pub fn bucket_count(&self) -> usize {
self.values.len()
}
/// Adds a single value to this histogram.
pub fn accumulate(&mut self, sample: u64) {
let bucket_min = self.bucketing.sample_to_bucket_minimum(sample);
let entry = self.values.entry(bucket_min).or_insert(0);
*entry += 1;
self.sum = self.sum.saturating_add(sample);
self.count += 1;
}
/// Gets the total sum of values recorded in this histogram.
pub fn sum(&self) -> u64 {
self.sum
}
/// Gets the total count of values recorded in this histogram.
pub fn count(&self) -> u64 {
self.count
}
/// Gets the filled values.
pub fn values(&self) -> &HashMap<u64, u64> {
&self.values
}
/// Checks if this histogram recorded any values.
pub fn is_empty(&self) -> bool {
self.count() == 0
}
/// Gets a snapshot of all values from the first bucket until one past the last filled bucket,
/// filling in empty buckets with 0.
pub fn snapshot_values(&self) -> HashMap<u64, u64> {
let mut res = self.values.clone();
let max_bucket = self.values.keys().max().cloned().unwrap_or(0);
for &min_bucket in self.bucketing.ranges() {
// Fill in missing entries.
let _ = res.entry(min_bucket).or_insert(0);
// stop one after the last filled bucket
if min_bucket > max_bucket {
break;
}
}
res
}
/// Clear this histogram.
pub fn clear(&mut self) {
self.sum = 0;
self.count = 0;
self.values.clear();
}
}
/// Either linear or exponential histogram bucketing
///
/// This is to be used as a single type to avoid generic use in the buffered API.
pub enum LinearOrExponential {
Linear(PrecomputedLinear),
Exponential(PrecomputedExponential),
}
impl Histogram<LinearOrExponential> {
/// A histogram using linear bucketing.
///
/// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
/// This type is only used for the buffered API.
pub fn _linear(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
Histogram {
values: HashMap::new(),
count: 0,
sum: 0,
bucketing: LinearOrExponential::Linear(PrecomputedLinear {
bucket_ranges: OnceCell::new(),
min,
max,
bucket_count,
}),
}
}
/// A histogram using expontential bucketing.
///
/// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
/// This type is only used for the buffered API.
pub fn _exponential(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
Histogram {
values: HashMap::new(),
count: 0,
sum: 0,
bucketing: LinearOrExponential::Exponential(PrecomputedExponential {
bucket_ranges: OnceCell::new(),
min,
max,
bucket_count,
}),
}
}
}
impl Bucketing for LinearOrExponential {
fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
use LinearOrExponential::*;
match self {
Linear(lin) => lin.sample_to_bucket_minimum(sample),
Exponential(exp) => exp.sample_to_bucket_minimum(sample),
}
}
fn ranges(&self) -> &[u64] {
use LinearOrExponential::*;
match self {
Linear(lin) => lin.ranges(),
Exponential(exp) => exp.ranges(),
}
}
}
impl<B> Histogram<B>
where
B: Bucketing,
B: std::fmt::Debug,
B: PartialEq,
{
/// Merges data from one histogram into the other.
///
/// ## Panics
///
/// Panics if the two histograms don't use the same bucketing.
pub fn merge(&mut self, other: &Self) {
assert_eq!(self.bucketing, other.bucketing);
self.sum += other.sum;
self.count += other.count;
for (&bucket, &count) in &other.values {
*self.values.entry(bucket).or_insert(0) += count;
}
}
}
impl<B> Histogram<B>
where
B: Bucketing + 'static,
B: std::fmt::Debug,
B: PartialEq,
{
/// Merges data from one histogram into the other.
///
/// ## Panics
///
/// Panics if the two histograms don't use the same bucketing.
/// Note that the `other` side can be either linear or exponential
/// and we only merge if it matches `self`'s bucketing.
// _Note:_ Unfortunately this needs a separate name from the above, otherwise it's a conflicting
// method.
// We only use it internally for the buffered API, and can guarantee correct usage that way.
pub fn _merge(&mut self, other: &Histogram<LinearOrExponential>) {
#[rustfmt::skip]
assert!(
(
TypeId::of::<B>() == TypeId::of::<PrecomputedLinear>()
&& matches!(other.bucketing, LinearOrExponential::Linear(_))
) ||
(
TypeId::of::<B>() == TypeId::of::<PrecomputedExponential>()
&& matches!(other.bucketing, LinearOrExponential::Exponential(_))
)
);
self.sum += other.sum;
self.count += other.count;
for (&bucket, &count) in &other.values {
*self.values.entry(bucket).or_insert(0) += count;
}
}
}