Skip to content

Commit 1eb2656

Browse files
committed
Enable precise Cachegrind collection mode
1 parent ce83d3c commit 1eb2656

File tree

7 files changed

+54
-1
lines changed

7 files changed

+54
-1
lines changed

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

collector/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,9 @@ benchlib = { path = "benchlib" }
4343
[target.'cfg(windows)'.dependencies]
4444
miow = "0.3"
4545
windows-sys = { version = "0.36.1", features = ["Win32_Foundation"] }
46+
47+
[features]
48+
# Enable more precise Cachegrind profiles for runtime benchmarks.
49+
# Requires a recent Valgrind to be installed.
50+
# Pass DEP_VALGRIND=<path-to-valgrind>/include environment variable when building.
51+
precise-cachegrind = []

collector/README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,15 @@ It is also possible to profile runtime benchmarks using the following command:
488488
```
489489

490490
Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
491-
`Cachegrind`.
491+
`Cachegrind`. If you pass `--features precise-cachegrind`, you can get more precise profiling results.
492+
In this mode, Cachegrind will only record the instructions of the actual benchmark, and ignore any
493+
other code (e.g. benchmark initialization). To use this mode, you need to provide a path to a Valgrind
494+
build directory (at least Valgrind 3.22 is required), like this:
495+
496+
```
497+
DEP_VALGRIND=<path-to-valgrind-install>/include cargo run --release --bin collector \
498+
--features precise-cachegrind profile_runtime cachegrind <RUSTC> <BENCHMARK_NAME>
499+
```
492500

493501
## Codegen diff
494502
You can use the `codegen_diff` command to display the assembly, LLVM IR or MIR difference between two

collector/benchlib/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ env_logger = "0.10.0"
1515
clap = { version = "4.1", features = ["derive", "string"] }
1616
libc = "0.2"
1717
flate2 = { version = "1", optional = true }
18+
crabgrind = { version = "0.1.10", optional = true }
1819

1920
[target.'cfg(target_os = "linux")'.dependencies]
2021
perf-event = "0.4.7"
2122

2223
[features]
2324
compression = ["dep:flate2"]
25+
precise-cachegrind = ["dep:crabgrind"]

collector/benchlib/src/profile.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
11
pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
22
let func = benchmark_constructor();
3+
4+
// With the `precise-cachegrind` feature, we want to enable cachegrind recording
5+
// only for the actual execution of the profiled function.
6+
#[cfg(feature = "precise-cachegrind")]
7+
{
8+
crabgrind::cachegrind::start_instrumentation();
9+
}
10+
311
func();
12+
13+
#[cfg(feature = "precise-cachegrind")]
14+
{
15+
crabgrind::cachegrind::stop_instrumentation();
16+
}
417
}

collector/src/runtime/benchmark.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,10 @@ fn start_cargo_build(
358358
command.arg(target_dir);
359359
}
360360

361+
// Enable the precise-cachegrind feature for the benchlib dependency of the runtime group.
362+
#[cfg(feature = "precise-cachegrind")]
363+
command.arg("--features").arg("benchlib/precise-cachegrind");
364+
361365
let child = command
362366
.spawn()
363367
.map_err(|error| anyhow::anyhow!("Failed to start cargo: {:?}", error))?;

collector/src/runtime/profile.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ pub fn profile_runtime(
3939
.arg("--branch-sim=no")
4040
.arg("--cache-sim=no")
4141
.arg(format!("--cachegrind-out-file={}", cgout_tmp.display()));
42+
43+
// Disable cachegrind profile collection at start.
44+
// It will be enabled only for the profiled function using
45+
// Valgrind client requests (see `benchlib/src/profile.rs`).
46+
#[cfg(feature = "precise-cachegrind")]
47+
cmd.arg("--instr-at-start=no");
48+
4249
cmd.stdin(Stdio::null());
4350
cmd.arg(&group.binary).arg("profile").arg(benchmark);
4451
command_output(&mut cmd).context("Cannot run profiler")?;

0 commit comments

Comments
 (0)