Skip to content

Simplify handling of self-profile data in collector #1781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions collector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ object = "0.32.1"
tabled = { version = "0.14.0" , features = ["ansi-str"]}
humansize = "2.1.3"
regex = "1.7.1"
analyzeme = { git = "https://github.com/rust-lang/measureme", branch = "stable" }

benchlib = { path = "benchlib" }

Expand Down
3 changes: 1 addition & 2 deletions collector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ The following options alter the behaviour of the `bench_local` subcommand.
choices are one or more (comma-separated) of `Llvm`, `Cranelift`. The default
is `Llvm`.
- `--self-profile`: use rustc's `-Zself-profile` option to produce
query/function tables in the output. The `measureme` tool must be installed
for this to work.
query/function tables in the output.

`RUST_LOG=debug` can be specified to enable verbose logging, which is useful
for debugging `collector` itself.
Expand Down
3 changes: 0 additions & 3 deletions collector/collect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ while : ; do
rustup update
cargo +nightly build --release -p collector

# Install measureme tooling
cargo install --git https://github.com/rust-lang/measureme --branch stable flamegraph crox summarize

target/release/collector bench_next $SITE_URL --self-profile --bench-rustc --db $DATABASE
STATUS=$?
echo finished run at `date` with exit code $STATUS
Expand Down
6 changes: 0 additions & 6 deletions collector/src/bin/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1593,12 +1593,6 @@ fn bench_compile(
shared.artifact_id, shared.toolchain.triple
);

if config.is_self_profile {
if let Err(e) = check_measureme_installed() {
panic!("{}Or omit --self-profile` to opt out\n", e);
}
}

let bench_rustc = config.bench_rustc;

let start = Instant::now();
Expand Down
76 changes: 2 additions & 74 deletions collector/src/bin/rustc-fake.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use anyhow::Context;
use std::env;
use std::ffi::OsString;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::time::{Duration, Instant};
Expand Down Expand Up @@ -436,55 +434,8 @@ fn process_self_profile_output(prof_out_dir: PathBuf, args: &[OsString]) {
.find(|args| args[0] == "--crate-name")
.and_then(|args| args[1].to_str())
.expect("rustc to be invoked with crate name");
let mut prefix = None;
let mut full_path = None;
// We don't know the pid of rustc, and can't easily get it -- we only know the
// `perf` pid. So just blindly look in the directory to hopefully find it.
for entry in fs::read_dir(&prof_out_dir).unwrap() {
let entry = entry.unwrap();
if entry
.file_name()
.to_str()
.map_or(false, |s| s.starts_with(crate_name))
{
if entry.file_name().to_str().unwrap().ends_with("mm_profdata") {
full_path = Some(entry.path());
break;
}
let file = entry.file_name().to_str().unwrap().to_owned();
let new_prefix = Some(file[..file.find('.').unwrap()].to_owned());
assert!(
prefix.is_none() || prefix == new_prefix,
"prefix={:?}, new_prefix={:?}",
prefix,
new_prefix
);
prefix = new_prefix;
}
}
if let Some(profile_data) = full_path {
// measureme 0.8 has a single file
println!("!self-profile-file:{}", profile_data.to_str().unwrap());
let filename = profile_data.file_name().unwrap().to_str().unwrap();
let json = match run_summarize("summarize", &prof_out_dir, filename) {
Ok(s) => s,
Err(e1) => match run_summarize("summarize-9.0", &prof_out_dir, filename) {
Ok(s) => s,
Err(e2) => {
panic!("failed to run summarize and summarize-9.0. Errors:\nsummarize: {:?}\nsummarize-9.0: {:?}", e1, e2);
}
},
};
println!("!self-profile-output:{}", json);
} else {
let prefix = prefix.unwrap_or_else(|| panic!("found prefix {:?}", prof_out_dir));
let json = run_summarize("summarize", &prof_out_dir, &prefix)
.or_else(|_| run_summarize("summarize-0.7", &prof_out_dir, &prefix))
.expect("able to run summarize or summarize-0.7");
println!("!self-profile-dir:{}", prof_out_dir.to_str().unwrap());
println!("!self-profile-prefix:{}", prefix);
println!("!self-profile-output:{}", json);
}
println!("!self-profile-dir:{}", prof_out_dir.to_str().unwrap());
println!("!self-profile-crate:{}", crate_name);
}

#[cfg(windows)]
Expand Down Expand Up @@ -532,28 +483,5 @@ fn print_time(dur: Duration) {
);
}

fn run_summarize(name: &str, prof_out_dir: &Path, prefix: &str) -> anyhow::Result<String> {
let mut cmd = Command::new(name);
cmd.current_dir(prof_out_dir);
cmd.arg("summarize").arg("--json");
cmd.arg(prefix);
let status = cmd
.status()
.with_context(|| format!("Command::new({}).status() failed", name))?;
if !status.success() {
anyhow::bail!(
"failed to run {} in {:?} with prefix {:?}",
name,
prof_out_dir,
prefix
)
}
let json = prof_out_dir.join(format!(
"{}.json",
prefix.strip_suffix(".mm_profdata").unwrap_or(prefix)
));
fs::read_to_string(&json).with_context(|| format!("failed to read {:?}", json))
}

#[cfg(windows)]
fn print_memory() {}
43 changes: 3 additions & 40 deletions collector/src/compile/execute/bencher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use futures::{future, StreamExt};
use std::collections::VecDeque;
use std::future::Future;
use std::io::Read;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::pin::Pin;
use std::process::Command;
use std::{env, process};
Expand Down Expand Up @@ -214,7 +214,8 @@ impl<'a> Processor for BenchProcessor<'a> {
}
Err(
e @ (DeserializeStatError::ParseError { .. }
| DeserializeStatError::XperfError(..)),
| DeserializeStatError::XperfError(..)
| DeserializeStatError::IOError(..)),
) => {
panic!("process_perf_stat_output failed: {:?}", e);
}
Expand Down Expand Up @@ -281,44 +282,6 @@ impl SelfProfileS3Upload {
.context("create temporary file")
.unwrap();
let filename = match files {
SelfProfileFiles::Seven {
string_index,
string_data,
events,
} => {
let tarball = snap::write::FrameEncoder::new(Vec::new());
let mut builder = tar::Builder::new(tarball);
builder.mode(tar::HeaderMode::Deterministic);

let append_file = |builder: &mut tar::Builder<_>,
file: &Path,
name: &str|
-> anyhow::Result<()> {
if file.exists() {
// Silently ignore missing files, the new self-profile
// experiment with one file has a different structure.
builder.append_path_with_name(file, name)?;
}
Ok(())
};

append_file(&mut builder, &string_index, "self-profile.string_index")
.expect("append string index");
append_file(&mut builder, &string_data, "self-profile.string_data")
.expect("append string data");
append_file(&mut builder, &events, "self-profile.events").expect("append events");
builder.finish().expect("complete tarball");
std::fs::write(
upload.path(),
builder
.into_inner()
.expect("get")
.into_inner()
.expect("snap success"),
)
.expect("wrote tarball");
format!("self-profile-{}.tar.sz", collection)
}
SelfProfileFiles::Eight { file } => {
let data = std::fs::read(file).expect("read profile data");
let mut data = snap::read::FrameEncoder::new(&data[..]);
Expand Down
124 changes: 52 additions & 72 deletions collector/src/compile/execute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ use crate::compile::benchmark::BenchmarkName;
use crate::toolchain::Toolchain;
use crate::utils::fs::EnsureImmutableFile;
use crate::{async_command_output, command_output, utils};
use analyzeme::ArtifactSize;
use anyhow::Context;
use bencher::Bencher;
use database::QueryLabel;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::future::Future;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::process::{self, Command};
use std::str;
use std::time::Duration;

pub mod bencher;
mod etw_parser;
Expand Down Expand Up @@ -468,7 +468,7 @@ fn store_artifact_sizes_into_stats(stats: &mut Stats, profile: &SelfProfile) {
for artifact in profile.artifact_sizes.iter() {
stats
.stats
.insert(format!("size:{}", artifact.label), artifact.size as f64);
.insert(format!("size:{}", artifact.label), artifact.value as f64);
}
}

Expand All @@ -480,17 +480,12 @@ enum DeserializeStatError {
ParseError(String, #[source] ::std::num::ParseFloatError),
#[error("could not process xperf data")]
XperfError(#[from] anyhow::Error),
#[error("io error")]
IOError(#[from] std::io::Error),
}

enum SelfProfileFiles {
Seven {
string_data: PathBuf,
string_index: PathBuf,
events: PathBuf,
},
Eight {
file: PathBuf,
},
Eight { file: PathBuf },
}

fn process_stat_output(
Expand All @@ -499,25 +494,15 @@ fn process_stat_output(
let stdout = String::from_utf8(output.stdout.clone()).expect("utf8 output");
let mut stats = Stats::new();

let mut profile: Option<SelfProfile> = None;
let mut dir: Option<PathBuf> = None;
let mut prefix: Option<String> = None;
let mut file: Option<PathBuf> = None;
let mut self_profile_dir: Option<PathBuf> = None;
let mut self_profile_crate: Option<String> = None;
for line in stdout.lines() {
if let Some(stripped) = line.strip_prefix("!self-profile-output:") {
profile = Some(serde_json::from_str(stripped).unwrap());
continue;
}
if let Some(stripped) = line.strip_prefix("!self-profile-dir:") {
dir = Some(PathBuf::from(stripped));
continue;
}
if let Some(stripped) = line.strip_prefix("!self-profile-prefix:") {
prefix = Some(String::from(stripped));
self_profile_dir = Some(PathBuf::from(stripped));
continue;
}
if let Some(stripped) = line.strip_prefix("!self-profile-file:") {
file = Some(PathBuf::from(stripped));
if let Some(stripped) = line.strip_prefix("!self-profile-crate:") {
self_profile_crate = Some(String::from(stripped));
continue;
}
if let Some(counter_file) = line.strip_prefix("!counters-file:") {
Expand Down Expand Up @@ -581,39 +566,13 @@ fn process_stat_output(
);
}

let files = if let (Some(prefix), Some(dir)) = (prefix, dir) {
let mut string_index = PathBuf::new();
let mut string_data = PathBuf::new();
let mut events = PathBuf::new();
for entry in fs::read_dir(&dir).unwrap() {
let filename = entry.unwrap().file_name();
let filename_str = filename.to_str().unwrap();
let path = dir.join(filename_str);
if filename_str.ends_with(".events") {
assert!(filename_str.contains(&prefix), "{:?}", path);
events = path;
} else if filename_str.ends_with(".string_data") {
assert!(filename_str.contains(&prefix), "{:?}", path);
string_data = path;
} else if filename_str.ends_with(".string_index") {
assert!(filename_str.contains(&prefix), "{:?}", path);
string_index = path;
}
}

Some(SelfProfileFiles::Seven {
string_index,
string_data,
events,
})
} else {
file.map(|file| SelfProfileFiles::Eight { file })
};

if stats.is_empty() {
return Err(DeserializeStatError::NoOutput(output));
}

let (profile, files) = match (self_profile_dir, self_profile_crate) {
(Some(dir), Some(krate)) => parse_self_profile(dir, krate)?,
_ => (None, None),
};
Ok((stats, profile, files))
}

Expand Down Expand Up @@ -650,23 +609,44 @@ impl Stats {

#[derive(serde::Deserialize, Clone)]
pub struct SelfProfile {
pub query_data: Vec<QueryData>,
pub artifact_sizes: Vec<ArtifactSize>,
}

#[derive(serde::Deserialize, Clone)]
pub struct ArtifactSize {
pub label: QueryLabel,
#[serde(rename = "value")]
pub size: u64,
}

#[derive(serde::Deserialize, Clone)]
pub struct QueryData {
pub label: QueryLabel,
pub self_time: Duration,
pub number_of_cache_hits: u32,
pub invocation_count: u32,
pub blocked_time: Duration,
pub incremental_load_time: Duration,
fn parse_self_profile(
dir: PathBuf,
crate_name: String,
) -> std::io::Result<(Option<SelfProfile>, Option<SelfProfileFiles>)> {
// First, find the `.mm_profdata` file with the self-profile data.
let mut full_path = None;
// We don't know the pid of rustc, and can't easily get it -- we only know the
// `perf` pid. So just blindly look in the directory to hopefully find it.
for entry in fs::read_dir(dir)? {
let entry = entry?;
if entry.file_name().to_str().map_or(false, |s| {
s.starts_with(&crate_name) && s.ends_with("mm_profdata")
}) {
full_path = Some(entry.path());
break;
}
}
let (profile, files) = if let Some(profile_path) = full_path {
// measureme 0.8+ uses a single file
let data = fs::read(&profile_path)?;
let results = analyzeme::ProfilingData::from_paged_buffer(data, None)
.map_err(|error| {
eprintln!("Cannot read self-profile data: {error:?}");
std::io::Error::new(ErrorKind::InvalidData, error)
})?
.perform_analysis();
let profile = SelfProfile {
artifact_sizes: results.artifact_sizes,
};
let files = SelfProfileFiles::Eight { file: profile_path };
(Some(profile), Some(files))
} else {
// The old "3 files format" is not supported by analyzeme anymore, so we don't handle it
// here.
(None, None)
};
Ok((profile, files))
}