Skip to content

Commit c19ca6a

Browse files
authored
Merge pull request #1743 from rust-lang/precise-cachegrind
Add precise Cachegrind profiling mode
2 parents ce83d3c + c8b1440 commit c19ca6a

File tree

7 files changed

+64
-2
lines changed

7 files changed

+64
-2
lines changed

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

collector/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,9 @@ benchlib = { path = "benchlib" }
4343
[target.'cfg(windows)'.dependencies]
4444
miow = "0.3"
4545
windows-sys = { version = "0.36.1", features = ["Win32_Foundation"] }
46+
47+
[features]
48+
# Enable more precise Cachegrind profiles for runtime benchmarks.
49+
# Requires a recent Valgrind to be installed.
50+
# Pass DEP_VALGRIND=<path-to-valgrind>/include environment variable when building.
51+
precise-cachegrind = []

collector/README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,15 @@ It is also possible to profile runtime benchmarks using the following command:
488488
```
489489

490490
Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
491-
`Cachegrind`.
491+
`Cachegrind`. If you pass `--features precise-cachegrind`, you can get more precise profiling results.
492+
In this mode, Cachegrind will only record the instructions of the actual benchmark, and ignore any
493+
other code (e.g. benchmark initialization). To use this mode, you need to provide a path to a Valgrind
494+
build directory (at least Valgrind 3.22 is required), like this:
495+
496+
```
497+
DEP_VALGRIND=<path-to-valgrind-install>/include cargo run --release --bin collector \
498+
--features precise-cachegrind profile_runtime cachegrind <RUSTC> <BENCHMARK_NAME>
499+
```
492500

493501
## Codegen diff
494502
You can use the `codegen_diff` command to display the assembly, LLVM IR or MIR difference between two

collector/benchlib/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ env_logger = "0.10.0"
1515
clap = { version = "4.1", features = ["derive", "string"] }
1616
libc = "0.2"
1717
flate2 = { version = "1", optional = true }
18+
crabgrind = { version = "0.1.10", optional = true }
1819

1920
[target.'cfg(target_os = "linux")'.dependencies]
2021
perf-event = "0.4.7"
2122

2223
[features]
2324
compression = ["dep:flate2"]
25+
precise-cachegrind = ["dep:crabgrind"]

collector/benchlib/src/profile.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
11
pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
22
let func = benchmark_constructor();
3+
4+
// With the `precise-cachegrind` feature, we want to enable cachegrind recording
5+
// only for the actual execution of the profiled function.
6+
#[cfg(feature = "precise-cachegrind")]
7+
{
8+
crabgrind::cachegrind::start_instrumentation();
9+
}
10+
311
func();
12+
13+
#[cfg(feature = "precise-cachegrind")]
14+
{
15+
crabgrind::cachegrind::stop_instrumentation();
16+
}
417
}

collector/src/runtime/benchmark.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,16 @@ pub struct BenchmarkSuiteCompilation {
135135

136136
impl BenchmarkSuiteCompilation {
137137
pub fn extract_suite(self) -> BenchmarkSuite {
138-
assert!(self.failed_to_compile.is_empty());
138+
use std::fmt::Write;
139+
140+
if !self.failed_to_compile.is_empty() {
141+
let mut message =
142+
"Cannot extract runtime suite because of compilation errors:\n".to_string();
143+
for (group, error) in self.failed_to_compile {
144+
writeln!(message, "{group}\n{error}\n").unwrap();
145+
}
146+
panic!("{message}");
147+
}
139148
self.suite
140149
}
141150
}
@@ -358,6 +367,10 @@ fn start_cargo_build(
358367
command.arg(target_dir);
359368
}
360369

370+
// Enable the precise-cachegrind feature for the benchlib dependency of the runtime group.
371+
#[cfg(feature = "precise-cachegrind")]
372+
command.arg("--features").arg("benchlib/precise-cachegrind");
373+
361374
let child = command
362375
.spawn()
363376
.map_err(|error| anyhow::anyhow!("Failed to start cargo: {:?}", error))?;

collector/src/runtime/profile.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ pub fn profile_runtime(
3939
.arg("--branch-sim=no")
4040
.arg("--cache-sim=no")
4141
.arg(format!("--cachegrind-out-file={}", cgout_tmp.display()));
42+
43+
// Disable cachegrind profile collection at start.
44+
// It will be enabled only for the profiled function using
45+
// Valgrind client requests (see `benchlib/src/profile.rs`).
46+
#[cfg(feature = "precise-cachegrind")]
47+
cmd.arg("--instr-at-start=no");
48+
4249
cmd.stdin(Stdio::null());
4350
cmd.arg(&group.binary).arg("profile").arg(benchmark);
4451
command_output(&mut cmd).context("Cannot run profiler")?;

0 commit comments

Comments
 (0)