Skip to content

Store artifact sizes into the DB #1657

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions collector/src/bin/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use collector::compile::benchmark::{
compile_benchmark_dir, get_compile_benchmarks, Benchmark, BenchmarkName,
};
use collector::{runtime, utils, CollectorCtx, CollectorStepBuilder};
use database::{ArtifactId, Commit, CommitType, Connection, Pool};
use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Pool};
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use std::cmp::Ordering;
use std::ffi::OsStr;
Expand Down Expand Up @@ -921,8 +921,8 @@ fn main_result() -> anyhow::Result<i32> {
sysroot.preserve(); // don't delete it

// Print the directory containing the toolchain.
sysroot.rustc.pop();
let s = format!("{:?}", sysroot.rustc);
sysroot.components.rustc.pop();
let s = format!("{:?}", sysroot.components.rustc);
println!("{}", &s[1..s.len() - 1]);

Ok(0)
Expand Down Expand Up @@ -978,6 +978,12 @@ fn run_benchmarks(
compile: Option<CompileBenchmarkConfig>,
runtime: Option<RuntimeBenchmarkConfig>,
) -> anyhow::Result<()> {
rt.block_on(record_toolchain_sizes(
connection.as_mut(),
&shared.artifact_id,
&shared.toolchain,
));

let collector = rt.block_on(init_collection(
connection.as_mut(),
&shared,
Expand Down Expand Up @@ -1190,6 +1196,38 @@ fn bench_compile(
errors
}

/// Records the sizes of individual components (rustc, libLLVM, etc.) for the given toolchain
/// and artifact id into the database.
async fn record_toolchain_sizes(
conn: &mut dyn Connection,
artifact_id: &ArtifactId,
toolchain: &Toolchain,
) {
let aid = conn.artifact_id(artifact_id).await;

async fn record(
conn: &mut dyn Connection,
aid: ArtifactIdNumber,
component: &str,
path: Option<&Path>,
) {
if let Some(path) = path {
if let Ok(size) = fs::metadata(path).map(|m| m.len()) {
conn.record_artifact_size(aid, component, size).await;
}
}
}

let paths = &toolchain.components;
record(conn, aid, "rustc", Some(&paths.rustc)).await;
record(conn, aid, "rustdoc", paths.rustdoc.as_deref()).await;
record(conn, aid, "cargo", Some(&paths.cargo)).await;
record(conn, aid, "librustc_driver", paths.lib_rustc.as_deref()).await;
record(conn, aid, "libstd", paths.lib_std.as_deref()).await;
record(conn, aid, "libtest", paths.lib_test.as_deref()).await;
record(conn, aid, "libLLVM", paths.lib_llvm.as_deref()).await;
}

fn add_perf_config(directory: &Path, category: Category) {
let data = serde_json::json!({
"category": category.to_string()
Expand Down
6 changes: 3 additions & 3 deletions collector/src/compile/execute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ impl<'a> CargoProcess<'a> {
}

fn base_command(&self, cwd: &Path, subcommand: &str) -> Command {
let mut cmd = Command::new(Path::new(&self.toolchain.cargo));
let mut cmd = Command::new(Path::new(&self.toolchain.components.cargo));
cmd
// Not all cargo invocations (e.g. `cargo clean`) need all of these
// env vars set, but it doesn't hurt to have them.
.env("RUSTC", &*FAKE_RUSTC)
.env("RUSTC_REAL", &self.toolchain.rustc)
.env("RUSTC_REAL", &self.toolchain.components.rustc)
// We separately pass -Cincremental to the leaf crate --
// CARGO_INCREMENTAL is cached separately for both the leaf crate
// and any in-tree dependencies, and we don't want that; it wastes
Expand All @@ -164,7 +164,7 @@ impl<'a> CargoProcess<'a> {
.arg("--manifest-path")
.arg(&self.manifest_path);

if let Some(r) = &self.toolchain.rustdoc {
if let Some(r) = &self.toolchain.components.rustdoc {
cmd.env("RUSTDOC", &*FAKE_RUSTDOC).env("RUSTDOC_REAL", r);
}
cmd
Expand Down
6 changes: 3 additions & 3 deletions collector/src/compile/execute/rustc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ fn record(
.arg("rust.deny-warnings=false")
.arg("--set")
.arg(&format!("build.rustc={}", fake_rustc.to_str().unwrap()))
.env("RUSTC_PERF_REAL_RUSTC", &toolchain.rustc)
.env("RUSTC_PERF_REAL_RUSTC", &toolchain.components.rustc)
.arg("--set")
.arg(&format!(
"build.cargo={}",
toolchain.cargo.to_str().unwrap()
toolchain.components.cargo.to_str().unwrap()
))
.status()
.context("configuring")?;
Expand All @@ -114,7 +114,7 @@ fn record(
.context("x.py script canonicalize")?,
)
.current_dir(checkout)
.env("RUSTC_PERF_REAL_RUSTC", &toolchain.rustc)
.env("RUSTC_PERF_REAL_RUSTC", &toolchain.components.rustc)
.arg("build")
.arg("--stage")
.arg("0")
Expand Down
4 changes: 2 additions & 2 deletions collector/src/runtime/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ fn start_cargo_build(
benchmark_dir: &Path,
target_dir: Option<&Path>,
) -> anyhow::Result<Child> {
let mut command = Command::new(&toolchain.cargo);
let mut command = Command::new(&toolchain.components.cargo);
command
.env("RUSTC", &toolchain.rustc)
.env("RUSTC", &toolchain.components.rustc)
.arg("build")
.arg("--release")
.arg("--message-format")
Expand Down
101 changes: 83 additions & 18 deletions collector/src/toolchain.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::compile::benchmark::profile::Profile;
use anyhow::{anyhow, Context};
use log::debug;
use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::{BufReader, Read};
use std::path::{Path, PathBuf};
Expand All @@ -12,9 +13,7 @@ use xz2::bufread::XzDecoder;
/// Sysroot downloaded from CI.
pub struct Sysroot {
pub sha: String,
pub rustc: PathBuf,
pub rustdoc: PathBuf,
pub cargo: PathBuf,
pub components: ToolchainComponents,
pub triple: String,
pub preserve: bool,
}
Expand Down Expand Up @@ -121,10 +120,15 @@ impl SysrootDownload {
})
};

let components = ToolchainComponents::from_binaries_and_libdir(
sysroot_bin("rustc")?,
Some(sysroot_bin("rustdoc")?),
sysroot_bin("cargo")?,
&self.directory.join(&self.rust_sha).join("lib"),
)?;

Ok(Sysroot {
rustc: sysroot_bin("rustc")?,
rustdoc: sysroot_bin("rustdoc")?,
cargo: sysroot_bin("cargo")?,
components,
sha: self.rust_sha,
triple: self.triple,
preserve: false,
Expand Down Expand Up @@ -218,25 +222,72 @@ impl SysrootDownload {
/// Representation of a toolchain that can be used to compile Rust programs.
#[derive(Debug, Clone)]
pub struct Toolchain {
pub rustc: PathBuf,
pub rustdoc: Option<PathBuf>,
pub cargo: PathBuf,
pub components: ToolchainComponents,
pub id: String,
pub triple: String,
}

impl Toolchain {
pub fn from_sysroot(sysroot: &Sysroot, id: String) -> Self {
Self {
rustc: sysroot.rustc.clone(),
rustdoc: Some(sysroot.rustdoc.clone()),
cargo: sysroot.cargo.clone(),
components: sysroot.components.clone(),
id,
triple: sysroot.triple.clone(),
}
}
}

#[derive(Debug, Clone, Default)]
pub struct ToolchainComponents {
pub rustc: PathBuf,
pub rustdoc: Option<PathBuf>,
pub cargo: PathBuf,
pub lib_rustc: Option<PathBuf>,
pub lib_std: Option<PathBuf>,
pub lib_test: Option<PathBuf>,
pub lib_llvm: Option<PathBuf>,
}

impl ToolchainComponents {
fn from_binaries_and_libdir(
rustc: PathBuf,
rustdoc: Option<PathBuf>,
cargo: PathBuf,
libdir: &Path,
) -> anyhow::Result<Self> {
let mut component = ToolchainComponents {
rustc,
rustdoc,
cargo,
..Default::default()
};
component.fill_libraries(libdir)?;
Ok(component)
}

/// Finds known library components in the given `dir` and stores them in `self`.
fn fill_libraries(&mut self, dir: &Path) -> anyhow::Result<()> {
for entry in fs::read_dir(dir).context("Cannot read lib dir to find components")? {
let entry = entry?;
let path = entry.path();
if path.is_file() && path.extension() == Some(OsStr::new("so")) {
if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
if filename.starts_with("libLLVM") {
self.lib_llvm = Some(path);
} else if filename.starts_with("librustc_driver") {
self.lib_rustc = Some(path);
} else if filename.starts_with("libstd") {
self.lib_std = Some(path);
} else if filename.starts_with("libtest") {
self.lib_test = Some(path);
}
}
}
}
Ok(())
}
}

/// Get a toolchain from the input.
/// - `rustc`: check if the given one is acceptable.
/// - `rustdoc`: if one is given, check if it is acceptable. Otherwise, if
Expand Down Expand Up @@ -374,10 +425,10 @@ pub fn get_local_toolchain(
cargo
};

let lib_dir = get_lib_dir_from_rustc(&rustc).context("Cannot find libdir for rustc")?;

Ok(Toolchain {
rustc,
rustdoc,
cargo,
components: ToolchainComponents::from_binaries_and_libdir(rustc, rustdoc, cargo, &lib_dir)?,
id,
triple: target_triple,
})
Expand Down Expand Up @@ -420,11 +471,25 @@ pub fn create_toolchain_from_published_version(
debug!("Found rustdoc: {}", rustdoc.display());
debug!("Found cargo: {}", cargo.display());

let lib_dir = get_lib_dir_from_rustc(&rustc)?;

let components =
ToolchainComponents::from_binaries_and_libdir(rustc, Some(rustdoc), cargo, &lib_dir)?;

Ok(Toolchain {
rustc,
rustdoc: Some(rustdoc),
cargo,
components,
id: toolchain.to_string(),
triple: target_triple.to_string(),
})
}

fn get_lib_dir_from_rustc(rustc: &Path) -> anyhow::Result<PathBuf> {
let sysroot = Command::new(rustc)
.arg("--print")
.arg("sysroot")
.output()?
.stdout;
let sysroot_path = String::from_utf8_lossy(&sysroot);

Ok(Path::new(sysroot_path.as_ref().trim()).join("lib"))
}
37 changes: 20 additions & 17 deletions database/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,22 @@ id name date type
1 LOCAL_TEST release
```

### artifact size

Records the size of individual components (like `librustc_driver.so` or `libLLVM.so`) of a single
artifact.

This description includes:
* component: normalized name of the component (hashes are removed)
* size: size of the component in bytes

```
sqlite> select * from artifact_size limit 1;
aid component size
---------- ---------- ----------
1 libLLVM.so 177892352
```

### collection

A "collection" of benchmarks tied only differing by the statistic collected.
Expand Down Expand Up @@ -267,26 +283,13 @@ bors_sha pr parent_sha complete requested include exclude runs commi
1w0p83... 42 fq24xq... true <timestamp> 3 <timestamp>
```

### error_series

Records a compile-time benchmark that caused an error.

This table exists to avoid duplicating benchmarks many times in the `error` table.

```
sqlite> select * from error_series limit 1;
id crate
---------- -----------
1 hello-world
```

### error

Records a compilation error for an artifact and an entry in `error_series`.
Records a compilation or runtime error for an artifact and a benchmark.

```
sqlite> select * from error limit 1;
series aid error
---------- --- -----
1 42 Failed to compile...
aid benchmark error
---------- --- -----
1 syn-1.0.89 Failed to compile...
```
5 changes: 5 additions & 0 deletions database/src/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ pub trait Connection: Send + Sync {
krate: &str,
value: Duration,
);

/// Records the size of an artifact component (like `librustc_driver.so` or `libLLVM.so`) in
/// bytes.
async fn record_artifact_size(&self, artifact: ArtifactIdNumber, component: &str, size: u64);

/// Returns vector of bootstrap build times for the given artifacts. The kth
/// element is the minimum build time for the kth artifact in `aids`, across
/// all collections for the artifact, or none if there is no bootstrap data
Expand Down
Loading