test/
bench.rs

1//! Benchmarking module.
2
3use std::panic::{AssertUnwindSafe, catch_unwind};
4use std::sync::{Arc, Mutex};
5use std::time::{Duration, Instant};
6use std::{cmp, io};
7
8use super::Sender;
9use super::event::CompletedTest;
10use super::options::BenchMode;
11use super::test_result::TestResult;
12use super::types::{TestDesc, TestId};
13use crate::stats;
14
15/// An identity function that *__hints__* to the compiler to be maximally pessimistic about what
16/// `black_box` could do.
17///
18/// See [`std::hint::black_box`] for details.
19#[inline(always)]
20pub fn black_box<T>(dummy: T) -> T {
21    std::hint::black_box(dummy)
22}
23
24/// Manager of the benchmarking runs.
25///
26/// This is fed into functions marked with `#[bench]` to allow for
27/// set-up & tear-down before running a piece of code repeatedly via a
28/// call to `iter`.
29#[derive(Clone)]
30pub struct Bencher {
31    mode: BenchMode,
32    summary: Option<stats::Summary>,
33    pub bytes: u64,
34}
35
36impl Bencher {
37    /// Callback for benchmark functions to run in their body.
38    pub fn iter<T, F>(&mut self, mut inner: F)
39    where
40        F: FnMut() -> T,
41    {
42        if self.mode == BenchMode::Single {
43            ns_iter_inner(&mut inner, 1);
44            return;
45        }
46
47        self.summary = Some(iter(&mut inner));
48    }
49
50    pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String>
51    where
52        F: FnMut(&mut Bencher) -> Result<(), String>,
53    {
54        let result = f(self);
55        result.map(|_| self.summary)
56    }
57}
58
59#[derive(Debug, Clone, PartialEq)]
60pub struct BenchSamples {
61    pub ns_iter_summ: stats::Summary,
62    pub mb_s: usize,
63}
64
65pub fn fmt_bench_samples(bs: &BenchSamples) -> String {
66    use std::fmt::Write;
67    let mut output = String::new();
68
69    let median = bs.ns_iter_summ.median;
70    let deviation = bs.ns_iter_summ.max - bs.ns_iter_summ.min;
71
72    write!(
73        output,
74        "{:>14} ns/iter (+/- {})",
75        fmt_thousands_sep(median, ','),
76        fmt_thousands_sep(deviation, ',')
77    )
78    .unwrap();
79    if bs.mb_s != 0 {
80        write!(output, " = {} MB/s", bs.mb_s).unwrap();
81    }
82    output
83}
84
85// Format a number with thousands separators
86fn fmt_thousands_sep(mut n: f64, sep: char) -> String {
87    use std::fmt::Write;
88    let mut output = String::new();
89    let mut trailing = false;
90    for &pow in &[9, 6, 3, 0] {
91        let base = 10_usize.pow(pow);
92        if pow == 0 || trailing || n / base as f64 >= 1.0 {
93            match (pow, trailing) {
94                // modern CPUs can execute multiple instructions per nanosecond
95                // e.g. benching an ADD takes about 0.25ns.
96                (0, true) => write!(output, "{:06.2}", n / base as f64).unwrap(),
97                (0, false) => write!(output, "{:.2}", n / base as f64).unwrap(),
98                (_, true) => write!(output, "{:03}", n as usize / base).unwrap(),
99                _ => write!(output, "{}", n as usize / base).unwrap(),
100            }
101            if pow != 0 {
102                output.push(sep);
103            }
104            trailing = true;
105        }
106        n %= base as f64;
107    }
108
109    output
110}
111
112fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64
113where
114    F: FnMut() -> T,
115{
116    let start = Instant::now();
117    for _ in 0..k {
118        black_box(inner());
119    }
120    start.elapsed().as_nanos() as u64
121}
122
123pub fn iter<T, F>(inner: &mut F) -> stats::Summary
124where
125    F: FnMut() -> T,
126{
127    // Initial bench run to get ballpark figure.
128    let ns_single = ns_iter_inner(inner, 1);
129
130    // Try to estimate iter count for 1ms falling back to 1m
131    // iterations if first run took < 1ns.
132    let ns_target_total = 1_000_000; // 1ms
133    let mut n = ns_target_total / cmp::max(1, ns_single);
134
135    // if the first run took more than 1ms we don't want to just
136    // be left doing 0 iterations on every loop. The unfortunate
137    // side effect of not being able to do as many runs is
138    // automatically handled by the statistical analysis below
139    // (i.e., larger error bars).
140    n = cmp::max(1, n);
141
142    let mut total_run = Duration::new(0, 0);
143    let samples: &mut [f64] = &mut [0.0_f64; 50];
144    loop {
145        let loop_start = Instant::now();
146
147        for p in &mut *samples {
148            *p = ns_iter_inner(inner, n) as f64 / n as f64;
149        }
150
151        stats::winsorize(samples, 5.0);
152        let summ = stats::Summary::new(samples);
153
154        for p in &mut *samples {
155            let ns = ns_iter_inner(inner, 5 * n);
156            *p = ns as f64 / (5 * n) as f64;
157        }
158
159        stats::winsorize(samples, 5.0);
160        let summ5 = stats::Summary::new(samples);
161
162        let loop_run = loop_start.elapsed();
163
164        // If we've run for 100ms and seem to have converged to a
165        // stable median.
166        if loop_run > Duration::from_millis(100)
167            && summ.median_abs_dev_pct < 1.0
168            && summ.median - summ5.median < summ5.median_abs_dev
169        {
170            return summ5;
171        }
172
173        total_run += loop_run;
174        // Longest we ever run for is 3s.
175        if total_run > Duration::from_secs(3) {
176            return summ5;
177        }
178
179        // If we overflow here just return the results so far. We check a
180        // multiplier of 10 because we're about to multiply by 2 and the
181        // next iteration of the loop will also multiply by 5 (to calculate
182        // the summ5 result)
183        n = match n.checked_mul(10) {
184            Some(_) => n * 2,
185            None => {
186                return summ5;
187            }
188        };
189    }
190}
191
192pub fn benchmark<F>(
193    id: TestId,
194    desc: TestDesc,
195    monitor_ch: Sender<CompletedTest>,
196    nocapture: bool,
197    f: F,
198) where
199    F: FnMut(&mut Bencher) -> Result<(), String>,
200{
201    let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 };
202
203    let data = Arc::new(Mutex::new(Vec::new()));
204
205    if !nocapture {
206        io::set_output_capture(Some(data.clone()));
207    }
208
209    let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f)));
210
211    io::set_output_capture(None);
212
213    let test_result = match result {
214        //bs.bench(f) {
215        Ok(Ok(Some(ns_iter_summ))) => {
216            let ns_iter = cmp::max(ns_iter_summ.median as u64, 1);
217            let mb_s = bs.bytes * 1000 / ns_iter;
218
219            let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize };
220            TestResult::TrBench(bs)
221        }
222        Ok(Ok(None)) => {
223            // iter not called, so no data.
224            // FIXME: error in this case?
225            let samples: &mut [f64] = &mut [0.0_f64; 1];
226            let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 };
227            TestResult::TrBench(bs)
228        }
229        Err(_) => TestResult::TrFailed,
230        Ok(Err(_)) => TestResult::TrFailed,
231    };
232
233    let stdout = data.lock().unwrap().to_vec();
234    let message = CompletedTest::new(id, desc, test_result, None, stdout);
235    monitor_ch.send(message).unwrap();
236}
237
238pub fn run_once<F>(f: F) -> Result<(), String>
239where
240    F: FnMut(&mut Bencher) -> Result<(), String>,
241{
242    let mut bs = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 };
243    bs.bench(f).map(|_| ())
244}