bench_acquire.rs - mozsearch

Enable keyboard shortcuts

//! Benchmark the overhead that the synchronization of `OnceCell::get` causes.

//! We do some other operations that write to memory to get an imprecise but somewhat realistic

//! measurement.

use once_cell::sync::OnceCell;

use std::sync::atomic::{AtomicUsize, Ordering};

const N_THREADS: usize = 16;

const N_ROUNDS: usize = 1_000_000;

static CELL: OnceCell<usize> = OnceCell::new();

static OTHER: AtomicUsize = AtomicUsize::new(0);

fn main() {

    let start = std::time::Instant::now();

    let threads =

        (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>();

    for thread in threads {

        thread.join().unwrap();

    println!("{:?}", start.elapsed());

    println!("{:?}", OTHER.load(Ordering::Relaxed));

#[inline(never)]

fn thread_main(i: usize) {

    // The operations we do here don't really matter, as long as we do multiple writes, and

    // everything is messy enough to prevent the compiler from optimizing the loop away.

    let mut data = [i; 128];

    let mut accum = 0usize;

    for _ in 0..N_ROUNDS {

        let _value = CELL.get_or_init(|| i + 1);

        let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed);

        for j in data.iter_mut() {

            *j = (*j).wrapping_add(accum);

            accum = accum.wrapping_add(k);