Skip to content

Commit 142a732

Browse files
committed
Make perf. counter stats optional in BenchmarkStats
This enables to run runtime benchmarks even in environments that do not support perf. event profiling fully (like CI).
1 parent d386f1e commit 142a732

File tree

3 files changed

+59
-39
lines changed

3 files changed

+59
-39
lines changed

collector/benchlib/src/comm/messages.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ pub struct BenchmarkResult {
1616
}
1717

1818
/// The stats gathered by a single benchmark execution.
19+
/// Some of the perf. counters may be missing if the machine that executes the benchmark is unable
20+
/// to gather them.
1921
#[derive(Debug, serde::Serialize, serde::Deserialize)]
2022
pub struct BenchmarkStats {
21-
pub cycles: u64,
22-
pub instructions: u64,
23-
pub branch_misses: u64,
24-
pub cache_misses: u64,
25-
pub cache_references: u64,
23+
pub cycles: Option<u64>,
24+
pub instructions: Option<u64>,
25+
pub branch_misses: Option<u64>,
26+
pub cache_misses: Option<u64>,
27+
pub cache_references: Option<u64>,
2628
pub wall_time: Duration,
2729
}

collector/benchlib/src/measure/perf_counter/unix.rs

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@ use perf_event::events::Hardware;
44
use perf_event::{Builder, Counter, Group};
55
use std::time::Instant;
66

7+
/// A collection of CPU performance counters.
8+
/// The counters are optional, because some CPUs are not able to record them.
79
struct Counters {
8-
cycles: Counter,
9-
instructions: Counter,
10-
branch_misses: Counter,
11-
cache_misses: Counter,
12-
cache_references: Counter,
10+
cycles: Option<Counter>,
11+
instructions: Option<Counter>,
12+
branch_misses: Option<Counter>,
13+
cache_misses: Option<Counter>,
14+
cache_references: Option<Counter>,
1315
}
1416

1517
/// Benchmarks a single function generated by `benchmark_constructor`.
@@ -48,11 +50,11 @@ pub fn benchmark_function<F: Fn() -> Bench + 'static, R, Bench: FnOnce() -> R +
4850
black_box(output);
4951

5052
let result = BenchmarkStats {
51-
cycles: measurement[&counters.cycles],
52-
instructions: measurement[&counters.instructions],
53-
branch_misses: measurement[&counters.branch_misses],
54-
cache_misses: measurement[&counters.cache_misses],
55-
cache_references: measurement[&counters.cache_references],
53+
cycles: counters.cycles.map(|c| measurement[&c]),
54+
instructions: counters.instructions.map(|c| measurement[&c]),
55+
branch_misses: counters.branch_misses.map(|c| measurement[&c]),
56+
cache_misses: counters.cache_misses.map(|c| measurement[&c]),
57+
cache_references: counters.cache_references.map(|c| measurement[&c]),
5658
wall_time: duration,
5759
};
5860
Ok(result)
@@ -77,19 +79,23 @@ Try lowering it with `sudo bash -c 'echo -1 > /proc/sys/kernel/perf_event_parano
7779
}
7880

7981
fn prepare_counters(group: &mut Group) -> anyhow::Result<Counters> {
80-
let mut add_event = |event: Hardware| {
81-
Builder::new()
82-
.group(group)
83-
.kind(event)
84-
.build()
85-
.map_err(|error| anyhow::anyhow!("Could not add counter {:?}: {:?}", event, error))
82+
let mut add_event = |event: Hardware| match Builder::new().group(group).kind(event).build() {
83+
Ok(counter) => Some(counter),
84+
Err(error) => {
85+
log::warn!(
86+
"Could not add counter {:?}: {:?}. Maybe the CPU doesn't support it?",
87+
event,
88+
error
89+
);
90+
None
91+
}
8692
};
8793

88-
let cycles = add_event(Hardware::CPU_CYCLES)?;
89-
let instructions = add_event(Hardware::INSTRUCTIONS)?;
90-
let branch_misses = add_event(Hardware::BRANCH_MISSES)?;
91-
let cache_misses = add_event(Hardware::CACHE_MISSES)?;
92-
let cache_references = add_event(Hardware::CACHE_REFERENCES)?;
94+
let cycles = add_event(Hardware::CPU_CYCLES);
95+
let instructions = add_event(Hardware::INSTRUCTIONS);
96+
let branch_misses = add_event(Hardware::BRANCH_MISSES);
97+
let cache_misses = add_event(Hardware::CACHE_MISSES);
98+
let cache_references = add_event(Hardware::CACHE_REFERENCES);
9399

94100
Ok(Counters {
95101
cycles,

collector/src/runtime/mod.rs

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,24 +125,36 @@ fn calculate_mean<I: Iterator<Item = f64> + Clone>(iter: I) -> f64 {
125125
}
126126

127127
fn print_stats(result: &BenchmarkResult) {
128-
fn print_metric<F: Fn(&BenchmarkStats) -> u64>(result: &BenchmarkResult, name: &str, f: F) {
129-
let mean = calculate_mean(result.stats.iter().map(&f).map(|v| v as f64));
130-
let stddev = calculate_mean(
131-
result
132-
.stats
133-
.iter()
134-
.map(&f)
135-
.map(|v| (v as f64 - mean).powf(2.0)),
136-
)
137-
.sqrt();
138-
128+
fn print_metric<F: Fn(&BenchmarkStats) -> Option<u64>>(
129+
result: &BenchmarkResult,
130+
name: &str,
131+
f: F,
132+
) {
139133
let name = format!("[{name}]");
140-
println!("{name:>20}: {:>16} (+/- {:>8})", mean as u64, stddev as u64);
134+
let has_data = result.stats.iter().map(&f).all(|v| v.is_some());
135+
if has_data {
136+
let f = |stats: &BenchmarkStats| -> u64 { f(stats).unwrap() };
137+
let mean = calculate_mean(result.stats.iter().map(&f).map(|v| v as f64));
138+
let stddev = calculate_mean(
139+
result
140+
.stats
141+
.iter()
142+
.map(&f)
143+
.map(|v| (v as f64 - mean).powf(2.0)),
144+
)
145+
.sqrt();
146+
147+
println!("{name:>20}: {:>16} (+/- {:>8})", mean as u64, stddev as u64);
148+
} else {
149+
println!("{name:>20}: Not available");
150+
}
141151
}
142152

143153
print_metric(result, "Instructions", |m| m.instructions);
144154
print_metric(result, "Cycles", |m| m.cycles);
145-
print_metric(result, "Wall time [us]", |m| m.wall_time.as_micros() as u64);
155+
print_metric(result, "Wall time [us]", |m| {
156+
Some(m.wall_time.as_micros() as u64)
157+
});
146158
print_metric(result, "Branch misses", |m| m.branch_misses);
147159
print_metric(result, "Cache misses", |m| m.cache_misses);
148160
}

0 commit comments

Comments
 (0)