Skip to content

Add a command to profile a runtime benchmark #1691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions collector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,15 @@ profilers whose results are not affected by system noise (e.g. `callgrind` or `e
`RUST_LOG=debug` can be specified to enable verbose logging, which is useful
for debugging `collector` itself.

## Profiling runtime benchmarks
It is also possible to profile runtime benchmarks using the following command:

```
./target/release/collector profile_runtime <PROFILER> <RUSTC> <BENCHMARK_NAME>
```

Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
`Cachegrind`.

## How `rustc` wrapping works
When a crate is benchmarked or profiled, the real `rustc` is replaced with the `rustc-fake` binary,
Expand Down
45 changes: 36 additions & 9 deletions collector/benchlib/src/benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use crate::cli::{parse_cli, Args, BenchmarkArgs};
use crate::cli::{parse_cli, Args, BenchmarkArgs, ProfileArgs};
use crate::comm::messages::{BenchmarkMessage, BenchmarkResult, BenchmarkStats};
use crate::comm::output_message;
use crate::measure::benchmark_function;
use crate::process::raise_process_priority;
use crate::profile::profile_function;
use std::collections::HashMap;
use std::rc::Rc;

/// Create and run a new benchmark group. Use the closure argument to register
/// the individual benchmarks.
Expand All @@ -18,12 +20,21 @@ where
group.run().expect("Benchmark group execution has failed");
}

/// Type-erased function that executes a single benchmark.
/// Type-erased function that executes a single benchmark and measures counter and wall-time
/// metrics.
type BenchmarkFn<'a> = Box<dyn Fn() -> anyhow::Result<BenchmarkStats> + 'a>;

/// Type-erased function that executes a single benchmark once.
type ProfileFn<'a> = Box<dyn Fn() + 'a>;

struct BenchmarkProfileFns<'a> {
benchmark_fn: BenchmarkFn<'a>,
profile_fn: ProfileFn<'a>,
}

#[derive(Default)]
pub struct BenchmarkGroup<'a> {
benchmarks: HashMap<&'static str, BenchmarkFn<'a>>,
benchmarks: HashMap<&'static str, BenchmarkProfileFns<'a>>,
}

impl<'a> BenchmarkGroup<'a> {
Expand All @@ -40,8 +51,13 @@ impl<'a> BenchmarkGroup<'a> {
Bench: FnOnce() -> R,
{
// We want to type-erase the target `func` by wrapping it in a Box.
let benchmark_fn = Box::new(move || benchmark_function(&constructor));
if self.benchmarks.insert(name, benchmark_fn).is_some() {
let constructor = Rc::new(constructor);
let constructor2 = constructor.clone();
let benchmark_fns = BenchmarkProfileFns {
benchmark_fn: Box::new(move || benchmark_function(constructor.as_ref())),
profile_fn: Box::new(move || profile_function(constructor2.as_ref())),
};
if self.benchmarks.insert(name, benchmark_fns).is_some() {
panic!("Benchmark '{}' was registered twice", name);
}
}
Expand All @@ -56,14 +72,15 @@ impl<'a> BenchmarkGroup<'a> {
Args::Run(args) => {
self.run_benchmarks(args)?;
}
Args::Profile(args) => self.profile_benchmark(args)?,
Args::List => self.list_benchmarks()?,
}

Ok(())
}

fn run_benchmarks(self, args: BenchmarkArgs) -> anyhow::Result<()> {
let mut items: Vec<(&'static str, BenchmarkFn)> = self
let mut items: Vec<(&'static str, BenchmarkProfileFns)> = self
.benchmarks
.into_iter()
.filter(|(name, _)| {
Expand All @@ -74,17 +91,17 @@ impl<'a> BenchmarkGroup<'a> {

let mut stdout = std::io::stdout().lock();

for (name, benchmark_fn) in items {
for (name, benchmark_fns) in items {
let mut stats: Vec<BenchmarkStats> = Vec::with_capacity(args.iterations as usize);
// Warm-up
for _ in 0..3 {
let benchmark_stats = benchmark_fn()?;
let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
black_box(benchmark_stats);
}

// Actual measurement
for i in 0..args.iterations {
let benchmark_stats = benchmark_fn()?;
let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
log::info!("Benchmark (run {i}) `{name}` completed: {benchmark_stats:?}");
stats.push(benchmark_stats);
}
Expand All @@ -100,6 +117,16 @@ impl<'a> BenchmarkGroup<'a> {
Ok(())
}

fn profile_benchmark(self, args: ProfileArgs) -> anyhow::Result<()> {
let Some(benchmark) = self.benchmarks.get(args.benchmark.as_str()) else {
return Err(anyhow::anyhow!("Benchmark `{}` not found. Available benchmarks: {}", args.benchmark,
self.benchmarks.keys().map(|s| s.to_string()).collect::<Vec<_>>().join(", ")));
};
(benchmark.profile_fn)();

Ok(())
}

fn list_benchmarks(self) -> anyhow::Result<()> {
let benchmark_list: Vec<&str> = self.benchmarks.into_keys().collect();
serde_json::to_writer(std::io::stdout(), &benchmark_list)?;
Expand Down
8 changes: 8 additions & 0 deletions collector/benchlib/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use clap::{CommandFactory, FromArgMatches};
pub enum Args {
/// Benchmark all benchmarks in this benchmark group and print the results as JSON.
Run(BenchmarkArgs),
/// Profile a single benchmark execution.
Profile(ProfileArgs),
/// List benchmarks that are defined in the current group as a JSON array.
List,
}
Expand All @@ -23,6 +25,12 @@ pub struct BenchmarkArgs {
pub include: Option<String>,
}

#[derive(clap::Parser, Debug)]
pub struct ProfileArgs {
/// Name of the benchmark that should be profiled.
pub benchmark: String,
}

#[test]
fn verify_cli() {
// By default, clap lazily checks subcommands. This provides eager testing
Expand Down
1 change: 1 addition & 0 deletions collector/benchlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod cli;
pub mod comm;
pub mod measure;
pub mod process;
mod profile;
mod utils;

#[cfg(feature = "compression")]
Expand Down
4 changes: 4 additions & 0 deletions collector/benchlib/src/profile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
let func = benchmark_constructor();
func();
}
76 changes: 61 additions & 15 deletions collector/src/bin/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use collector::compile::benchmark::scenario::Scenario;
use collector::compile::benchmark::{
compile_benchmark_dir, get_compile_benchmarks, ArtifactType, Benchmark, BenchmarkName,
};
use collector::{runtime, utils, CollectorCtx, CollectorStepBuilder};
use collector::{utils, CollectorCtx, CollectorStepBuilder};
use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Pool};
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use std::cmp::Ordering;
Expand All @@ -31,9 +31,11 @@ use tokio::runtime::Runtime;
use collector::compile::execute::bencher::BenchProcessor;
use collector::compile::execute::profiler::{ProfileProcessor, Profiler};
use collector::runtime::{
bench_runtime, runtime_benchmark_dir, BenchmarkFilter, BenchmarkSuite,
BenchmarkSuiteCompilation, CargoIsolationMode, DEFAULT_RUNTIME_ITERATIONS,
bench_runtime, prepare_runtime_benchmark_suite, runtime_benchmark_dir, BenchmarkFilter,
BenchmarkSuite, BenchmarkSuiteCompilation, CargoIsolationMode, RuntimeProfiler,
DEFAULT_RUNTIME_ITERATIONS,
};
use collector::runtime::{profile_runtime, RuntimeCompilationOpts};
use collector::toolchain::{
create_toolchain_from_published_version, get_local_toolchain, Sysroot, Toolchain,
};
Expand Down Expand Up @@ -245,7 +247,7 @@ fn cg_annotate(cgout: &Path, path: &Path) -> anyhow::Result<()> {
}

#[allow(clippy::too_many_arguments)]
fn profile(
fn profile_compile(
toolchain: &Toolchain,
profiler: Profiler,
out_dir: &Path,
Expand Down Expand Up @@ -492,6 +494,19 @@ enum Commands {
#[arg(long = "no-isolate")]
no_isolate: bool,
},

/// Profiles a runtime benchmark.
ProfileRuntime {
/// Profiler to use
profiler: RuntimeProfiler,

/// The path to the local rustc used to compile the runtime benchmark
rustc: String,

/// Name of the benchmark that should be profiled
benchmark: String,
},

/// Benchmarks a local rustc
BenchLocal {
#[command(flatten)]
Expand Down Expand Up @@ -640,15 +655,7 @@ fn main_result() -> anyhow::Result<i32> {
no_isolate,
} => {
log_db(&db);
let toolchain = get_local_toolchain(
&[Profile::Opt],
&local.rustc,
None,
local.cargo.as_deref(),
local.id.as_deref(),
"",
target_triple,
)?;
let toolchain = get_local_toolchain_for_runtime_benchmarks(&local, &target_triple)?;
let pool = Pool::open(&db.db);

let isolation_mode = if no_isolate {
Expand Down Expand Up @@ -679,6 +686,25 @@ fn main_result() -> anyhow::Result<i32> {
run_benchmarks(&mut rt, conn, shared, None, Some(config))?;
Ok(0)
}
Commands::ProfileRuntime {
profiler,
rustc,
benchmark,
} => {
let toolchain =
get_local_toolchain(&[Profile::Opt], &rustc, None, None, None, "", target_triple)?;
let suite = prepare_runtime_benchmark_suite(
&toolchain,
&runtime_benchmark_dir,
CargoIsolationMode::Cached,
// Compile with debuginfo to have filenames and line numbers available in the
// generated profiles.
RuntimeCompilationOpts::default().debug_info("1"),
)?
.suite;
profile_runtime(profiler, suite, &benchmark)?;
Ok(0)
}
Commands::BenchLocal {
local,
opts,
Expand Down Expand Up @@ -894,7 +920,7 @@ fn main_result() -> anyhow::Result<i32> {
target_triple.clone(),
)?;
let id = toolchain.id.clone();
profile(
profile_compile(
&toolchain,
profiler,
&out_dir,
Expand Down Expand Up @@ -995,6 +1021,21 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
}
}

fn get_local_toolchain_for_runtime_benchmarks(
local: &LocalOptions,
target_triple: &str,
) -> anyhow::Result<Toolchain> {
get_local_toolchain(
&[Profile::Opt],
&local.rustc,
None,
local.cargo.as_deref(),
local.id.as_deref(),
"",
target_triple.to_string(),
)
}

async fn load_runtime_benchmarks(
conn: &mut dyn Connection,
benchmark_dir: &Path,
Expand All @@ -1005,7 +1046,12 @@ async fn load_runtime_benchmarks(
let BenchmarkSuiteCompilation {
suite,
failed_to_compile,
} = runtime::prepare_runtime_benchmark_suite(toolchain, benchmark_dir, isolation_mode)?;
} = prepare_runtime_benchmark_suite(
toolchain,
benchmark_dir,
isolation_mode,
RuntimeCompilationOpts::default(),
)?;

record_runtime_compilation_errors(conn, artifact_id, failed_to_compile).await;
Ok(suite)
Expand Down
29 changes: 28 additions & 1 deletion collector/src/runtime/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ impl BenchmarkSuite {
.iter()
.flat_map(|suite| suite.benchmark_names.iter().map(|n| n.as_ref()))
}

pub fn get_group_by_benchmark(&self, benchmark: &str) -> Option<&BenchmarkGroup> {
self.groups.iter().find(|group| {
group
.benchmark_names
.iter()
.any(|b| b.as_str() == benchmark)
})
}
}

pub struct BenchmarkFilter {
Expand Down Expand Up @@ -97,6 +106,18 @@ pub struct BenchmarkSuiteCompilation {
pub failed_to_compile: HashMap<String, String>,
}

#[derive(Default)]
pub struct RuntimeCompilationOpts {
debug_info: Option<String>,
}

impl RuntimeCompilationOpts {
pub fn debug_info(mut self, debug_info: &str) -> Self {
self.debug_info = Some(debug_info.to_string());
self
}
}

/// Find all runtime benchmark crates in `benchmark_dir` and compile them.
/// We assume that each binary defines a benchmark suite using `benchlib`.
/// We then execute each benchmark suite with the `list-benchmarks` command to find out its
Expand All @@ -105,6 +126,7 @@ pub fn prepare_runtime_benchmark_suite(
toolchain: &Toolchain,
benchmark_dir: &Path,
isolation_mode: CargoIsolationMode,
opts: RuntimeCompilationOpts,
) -> anyhow::Result<BenchmarkSuiteCompilation> {
let benchmark_crates = get_runtime_benchmark_groups(benchmark_dir)?;

Expand Down Expand Up @@ -137,7 +159,7 @@ pub fn prepare_runtime_benchmark_suite(

let target_dir = temp_dir.as_ref().map(|d| d.path());

let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir)
let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir, &opts)
.with_context(|| {
anyhow::anyhow!("Cannot start compilation of {}", benchmark_crate.name)
})
Expand Down Expand Up @@ -267,6 +289,7 @@ fn start_cargo_build(
toolchain: &Toolchain,
benchmark_dir: &Path,
target_dir: Option<&Path>,
opts: &RuntimeCompilationOpts,
) -> anyhow::Result<Child> {
let mut command = Command::new(&toolchain.components.cargo);
command
Expand All @@ -280,6 +303,10 @@ fn start_cargo_build(
.stdout(Stdio::piped())
.stderr(Stdio::null());

if let Some(ref debug_info) = opts.debug_info {
command.env("CARGO_PROFILE_RELEASE_DEBUG", debug_info);
}

if let Some(target_dir) = target_dir {
command.arg("--target-dir");
command.arg(target_dir);
Expand Down
Loading