rust-lang · Kobzol · Aug 7, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 6, 2023
diff --git a/collector/README.md b/collector/README.md
@@ -475,6 +475,15 @@ profilers whose results are not affected by system noise (e.g. `callgrind` or `e
 `RUST_LOG=debug` can be specified to enable verbose logging, which is useful
 for debugging `collector` itself.
 
+## Profiling runtime benchmarks
+It is also possible to profile runtime benchmarks using the following command:
+
+```
+./target/release/collector profile_runtime <PROFILER> <RUSTC> <BENCHMARK_NAME>
+```
+
+Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
+`Cachegrind`.
 
 ## How `rustc` wrapping works
 When a crate is benchmarked or profiled, the real `rustc` is replaced with the `rustc-fake` binary,

diff --git a/collector/benchlib/src/benchmark.rs b/collector/benchlib/src/benchmark.rs
@@ -1,9 +1,11 @@
-use crate::cli::{parse_cli, Args, BenchmarkArgs};
+use crate::cli::{parse_cli, Args, BenchmarkArgs, ProfileArgs};
 use crate::comm::messages::{BenchmarkMessage, BenchmarkResult, BenchmarkStats};
 use crate::comm::output_message;
 use crate::measure::benchmark_function;
 use crate::process::raise_process_priority;
+use crate::profile::profile_function;
 use std::collections::HashMap;
+use std::rc::Rc;
 
 /// Create and run a new benchmark group. Use the closure argument to register
 /// the individual benchmarks.
@@ -18,12 +20,21 @@ where
     group.run().expect("Benchmark group execution has failed");
 }
 
-/// Type-erased function that executes a single benchmark.
+/// Type-erased function that executes a single benchmark and measures counter and wall-time
+/// metrics.
 type BenchmarkFn<'a> = Box<dyn Fn() -> anyhow::Result<BenchmarkStats> + 'a>;
 
+/// Type-erased function that executes a single benchmark once.
+type ProfileFn<'a> = Box<dyn Fn() + 'a>;
+
+struct BenchmarkProfileFns<'a> {
+    benchmark_fn: BenchmarkFn<'a>,
+    profile_fn: ProfileFn<'a>,
+}
+
 #[derive(Default)]
 pub struct BenchmarkGroup<'a> {
-    benchmarks: HashMap<&'static str, BenchmarkFn<'a>>,
+    benchmarks: HashMap<&'static str, BenchmarkProfileFns<'a>>,
 }
 
 impl<'a> BenchmarkGroup<'a> {
@@ -40,8 +51,13 @@ impl<'a> BenchmarkGroup<'a> {
         Bench: FnOnce() -> R,
     {
         // We want to type-erase the target `func` by wrapping it in a Box.
-        let benchmark_fn = Box::new(move || benchmark_function(&constructor));
-        if self.benchmarks.insert(name, benchmark_fn).is_some() {
+        let constructor = Rc::new(constructor);
+        let constructor2 = constructor.clone();
+        let benchmark_fns = BenchmarkProfileFns {
+            benchmark_fn: Box::new(move || benchmark_function(constructor.as_ref())),
+            profile_fn: Box::new(move || profile_function(constructor2.as_ref())),
+        };
+        if self.benchmarks.insert(name, benchmark_fns).is_some() {
             panic!("Benchmark '{}' was registered twice", name);
         }
     }
@@ -56,14 +72,15 @@ impl<'a> BenchmarkGroup<'a> {
             Args::Run(args) => {
                 self.run_benchmarks(args)?;
             }
+            Args::Profile(args) => self.profile_benchmark(args)?,
             Args::List => self.list_benchmarks()?,
         }
 
         Ok(())
     }
 
     fn run_benchmarks(self, args: BenchmarkArgs) -> anyhow::Result<()> {
-        let mut items: Vec<(&'static str, BenchmarkFn)> = self
+        let mut items: Vec<(&'static str, BenchmarkProfileFns)> = self
             .benchmarks
             .into_iter()
             .filter(|(name, _)| {
@@ -74,17 +91,17 @@ impl<'a> BenchmarkGroup<'a> {
 
         let mut stdout = std::io::stdout().lock();
 
-        for (name, benchmark_fn) in items {
+        for (name, benchmark_fns) in items {
             let mut stats: Vec<BenchmarkStats> = Vec::with_capacity(args.iterations as usize);
             // Warm-up
             for _ in 0..3 {
-                let benchmark_stats = benchmark_fn()?;
+                let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
                 black_box(benchmark_stats);
             }
 
             // Actual measurement
             for i in 0..args.iterations {
-                let benchmark_stats = benchmark_fn()?;
+                let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
                 log::info!("Benchmark (run {i}) `{name}` completed: {benchmark_stats:?}");
                 stats.push(benchmark_stats);
             }
@@ -100,6 +117,16 @@ impl<'a> BenchmarkGroup<'a> {
         Ok(())
     }
 
+    fn profile_benchmark(self, args: ProfileArgs) -> anyhow::Result<()> {
+        let Some(benchmark) = self.benchmarks.get(args.benchmark.as_str()) else {
+            return Err(anyhow::anyhow!("Benchmark `{}` not found. Available benchmarks: {}", args.benchmark,
+                self.benchmarks.keys().map(|s| s.to_string()).collect::<Vec<_>>().join(", ")));
+        };
+        (benchmark.profile_fn)();
+
+        Ok(())
+    }
+
     fn list_benchmarks(self) -> anyhow::Result<()> {
         let benchmark_list: Vec<&str> = self.benchmarks.into_keys().collect();
         serde_json::to_writer(std::io::stdout(), &benchmark_list)?;

diff --git a/collector/benchlib/src/cli.rs b/collector/benchlib/src/cli.rs
@@ -4,6 +4,8 @@ use clap::{CommandFactory, FromArgMatches};
 pub enum Args {
     /// Benchmark all benchmarks in this benchmark group and print the results as JSON.
     Run(BenchmarkArgs),
+    /// Profile a single benchmark execution.
+    Profile(ProfileArgs),
     /// List benchmarks that are defined in the current group as a JSON array.
     List,
 }
@@ -23,6 +25,12 @@ pub struct BenchmarkArgs {
     pub include: Option<String>,
 }
 
+#[derive(clap::Parser, Debug)]
+pub struct ProfileArgs {
+    /// Name of the benchmark that should be profiled.
+    pub benchmark: String,
+}
+
 #[test]
 fn verify_cli() {
     // By default, clap lazily checks subcommands. This provides eager testing

diff --git a/collector/benchlib/src/lib.rs b/collector/benchlib/src/lib.rs
@@ -18,6 +18,7 @@ mod cli;
 pub mod comm;
 pub mod measure;
 pub mod process;
+mod profile;
 mod utils;
 
 #[cfg(feature = "compression")]

diff --git a/collector/benchlib/src/profile.rs b/collector/benchlib/src/profile.rs
@@ -0,0 +1,4 @@
+pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
+    let func = benchmark_constructor();
+    func();
+}
diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -10,7 +10,7 @@ use collector::compile::benchmark::scenario::Scenario;
 use collector::compile::benchmark::{
     compile_benchmark_dir, get_compile_benchmarks, ArtifactType, Benchmark, BenchmarkName,
 };
-use collector::{runtime, utils, CollectorCtx, CollectorStepBuilder};
+use collector::{utils, CollectorCtx, CollectorStepBuilder};
 use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Pool};
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
 use std::cmp::Ordering;
@@ -31,9 +31,11 @@ use tokio::runtime::Runtime;
 use collector::compile::execute::bencher::BenchProcessor;
 use collector::compile::execute::profiler::{ProfileProcessor, Profiler};
 use collector::runtime::{
-    bench_runtime, runtime_benchmark_dir, BenchmarkFilter, BenchmarkSuite,
-    BenchmarkSuiteCompilation, CargoIsolationMode, DEFAULT_RUNTIME_ITERATIONS,
+    bench_runtime, prepare_runtime_benchmark_suite, runtime_benchmark_dir, BenchmarkFilter,
+    BenchmarkSuite, BenchmarkSuiteCompilation, CargoIsolationMode, RuntimeProfiler,
+    DEFAULT_RUNTIME_ITERATIONS,
 };
+use collector::runtime::{profile_runtime, RuntimeCompilationOpts};
 use collector::toolchain::{
     create_toolchain_from_published_version, get_local_toolchain, Sysroot, Toolchain,
 };
@@ -245,7 +247,7 @@ fn cg_annotate(cgout: &Path, path: &Path) -> anyhow::Result<()> {
 }
 
 #[allow(clippy::too_many_arguments)]
-fn profile(
+fn profile_compile(
     toolchain: &Toolchain,
     profiler: Profiler,
     out_dir: &Path,
@@ -492,6 +494,19 @@ enum Commands {
         #[arg(long = "no-isolate")]
         no_isolate: bool,
     },
+
+    /// Profiles a runtime benchmark.
+    ProfileRuntime {
+        /// Profiler to use
+        profiler: RuntimeProfiler,
+
+        /// The path to the local rustc used to compile the runtime benchmark
+        rustc: String,
+
+        /// Name of the benchmark that should be profiled
+        benchmark: String,
+    },
+
     /// Benchmarks a local rustc
     BenchLocal {
         #[command(flatten)]
@@ -640,15 +655,7 @@ fn main_result() -> anyhow::Result<i32> {
             no_isolate,
         } => {
             log_db(&db);
-            let toolchain = get_local_toolchain(
-                &[Profile::Opt],
-                &local.rustc,
-                None,
-                local.cargo.as_deref(),
-                local.id.as_deref(),
-                "",
-                target_triple,
-            )?;
+            let toolchain = get_local_toolchain_for_runtime_benchmarks(&local, &target_triple)?;
             let pool = Pool::open(&db.db);
 
             let isolation_mode = if no_isolate {
@@ -679,6 +686,25 @@ fn main_result() -> anyhow::Result<i32> {
             run_benchmarks(&mut rt, conn, shared, None, Some(config))?;
             Ok(0)
         }
+        Commands::ProfileRuntime {
+            profiler,
+            rustc,
+            benchmark,
+        } => {
+            let toolchain =
+                get_local_toolchain(&[Profile::Opt], &rustc, None, None, None, "", target_triple)?;
+            let suite = prepare_runtime_benchmark_suite(
+                &toolchain,
+                &runtime_benchmark_dir,
+                CargoIsolationMode::Cached,
+                // Compile with debuginfo to have filenames and line numbers available in the
+                // generated profiles.
+                RuntimeCompilationOpts::default().debug_info("1"),
+            )?
+            .suite;
+            profile_runtime(profiler, suite, &benchmark)?;
+            Ok(0)
+        }
         Commands::BenchLocal {
             local,
             opts,
@@ -894,7 +920,7 @@ fn main_result() -> anyhow::Result<i32> {
                         target_triple.clone(),
                     )?;
                     let id = toolchain.id.clone();
-                    profile(
+                    profile_compile(
                         &toolchain,
                         profiler,
                         &out_dir,
@@ -995,6 +1021,21 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
     }
 }
 
+fn get_local_toolchain_for_runtime_benchmarks(
+    local: &LocalOptions,
+    target_triple: &str,
+) -> anyhow::Result<Toolchain> {
+    get_local_toolchain(
+        &[Profile::Opt],
+        &local.rustc,
+        None,
+        local.cargo.as_deref(),
+        local.id.as_deref(),
+        "",
+        target_triple.to_string(),
+    )
+}
+
 async fn load_runtime_benchmarks(
     conn: &mut dyn Connection,
     benchmark_dir: &Path,
@@ -1005,7 +1046,12 @@ async fn load_runtime_benchmarks(
     let BenchmarkSuiteCompilation {
         suite,
         failed_to_compile,
-    } = runtime::prepare_runtime_benchmark_suite(toolchain, benchmark_dir, isolation_mode)?;
+    } = prepare_runtime_benchmark_suite(
+        toolchain,
+        benchmark_dir,
+        isolation_mode,
+        RuntimeCompilationOpts::default(),
+    )?;
 
     record_runtime_compilation_errors(conn, artifact_id, failed_to_compile).await;
     Ok(suite)

diff --git a/collector/src/runtime/benchmark.rs b/collector/src/runtime/benchmark.rs
@@ -59,6 +59,15 @@ impl BenchmarkSuite {
             .iter()
             .flat_map(|suite| suite.benchmark_names.iter().map(|n| n.as_ref()))
     }
+
+    pub fn get_group_by_benchmark(&self, benchmark: &str) -> Option<&BenchmarkGroup> {
+        self.groups.iter().find(|group| {
+            group
+                .benchmark_names
+                .iter()
+                .any(|b| b.as_str() == benchmark)
+        })
+    }
 }
 
 pub struct BenchmarkFilter {
@@ -97,6 +106,18 @@ pub struct BenchmarkSuiteCompilation {
     pub failed_to_compile: HashMap<String, String>,
 }
 
+#[derive(Default)]
+pub struct RuntimeCompilationOpts {
+    debug_info: Option<String>,
+}
+
+impl RuntimeCompilationOpts {
+    pub fn debug_info(mut self, debug_info: &str) -> Self {
+        self.debug_info = Some(debug_info.to_string());
+        self
+    }
+}
+
 /// Find all runtime benchmark crates in `benchmark_dir` and compile them.
 /// We assume that each binary defines a benchmark suite using `benchlib`.
 /// We then execute each benchmark suite with the `list-benchmarks` command to find out its
@@ -105,6 +126,7 @@ pub fn prepare_runtime_benchmark_suite(
     toolchain: &Toolchain,
     benchmark_dir: &Path,
     isolation_mode: CargoIsolationMode,
+    opts: RuntimeCompilationOpts,
 ) -> anyhow::Result<BenchmarkSuiteCompilation> {
     let benchmark_crates = get_runtime_benchmark_groups(benchmark_dir)?;
 
@@ -137,7 +159,7 @@ pub fn prepare_runtime_benchmark_suite(
 
         let target_dir = temp_dir.as_ref().map(|d| d.path());
 
-        let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir)
+        let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir, &opts)
             .with_context(|| {
                 anyhow::anyhow!("Cannot start compilation of {}", benchmark_crate.name)
             })
@@ -267,6 +289,7 @@ fn start_cargo_build(
     toolchain: &Toolchain,
     benchmark_dir: &Path,
     target_dir: Option<&Path>,
+    opts: &RuntimeCompilationOpts,
 ) -> anyhow::Result<Child> {
     let mut command = Command::new(&toolchain.components.cargo);
     command
@@ -280,6 +303,10 @@ fn start_cargo_build(
         .stdout(Stdio::piped())
         .stderr(Stdio::null());
 
+    if let Some(ref debug_info) = opts.debug_info {
+        command.env("CARGO_PROFILE_RELEASE_DEBUG", debug_info);
+    }
+
     if let Some(target_dir) = target_dir {
         command.arg("--target-dir");
         command.arg(target_dir);