Skip to content

Improve the summary message both in GitHub and in the triage report #1156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 97 additions & 46 deletions site/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,19 +163,36 @@ async fn populate_report(
}
}

/// A summary of a given comparison
///
/// This summary only includes changes that are significant and relevant (as determined by a changes magnitude).
pub struct ComparisonSummary {
/// Significant comparisons of magnitude small and above
/// and ordered by magnitude from largest to smallest
comparisons: Vec<TestResultComparison>,
/// The cached number of comparisons that are improvements
num_improvements: usize,
/// The cached number of comparisons that are regressions
num_regressions: usize,
}

impl ComparisonSummary {
pub fn summarize_comparison(comparison: &Comparison) -> Option<ComparisonSummary> {
let mut num_improvements = 0;
let mut num_regressions = 0;

let mut comparisons = comparison
.statistics
.iter()
.filter(|c| c.is_significant())
.filter(|c| c.magnitude().is_small_or_above())
.inspect(|c| {
if c.is_improvement() {
num_improvements += 1;
} else {
num_regressions += 1
}
})
.cloned()
.collect::<Vec<_>>();
// Skip empty commits, sometimes happens if there's a compiler bug or so.
Expand All @@ -191,18 +208,15 @@ impl ComparisonSummary {
};
comparisons.sort_by(cmp);

Some(ComparisonSummary { comparisons })
}

/// Gets the overall direction and magnitude of the changes
///
/// Returns `None` if there are no relevant changes.
pub fn direction_and_magnitude(&self) -> Option<(Direction, Magnitude)> {
self.direction().zip(self.magnitude())
Some(ComparisonSummary {
comparisons,
num_improvements,
num_regressions,
})
}

/// The direction of the changes
fn direction(&self) -> Option<Direction> {
pub fn direction(&self) -> Option<Direction> {
if self.comparisons.len() == 0 {
return None;
}
Expand Down Expand Up @@ -258,17 +272,18 @@ impl ComparisonSummary {
}
}

/// Get the largest magnitude of any change in the comparison.
///
/// Returns `None` if there are no relevant_changes
fn magnitude(&self) -> Option<Magnitude> {
[self.largest_improvement(), self.largest_regression()]
.iter()
.filter_map(|c| c.map(|c| c.magnitude()))
.max_by(|c1, c2| c1.cmp(c2))
/// The number of improvements that were found to be significant and relevant
pub fn number_of_improvements(&self) -> usize {
self.num_improvements
}

/// The number of regressions that were found to be significant and relevant
pub fn number_of_regressions(&self) -> usize {
self.num_regressions
}

pub fn relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
/// The most relevant changes (based on size)
pub fn most_relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
match self.direction() {
Some(Direction::Improvement) => [self.largest_improvement(), None],
Some(Direction::Regression) => [self.largest_regression(), None],
Expand All @@ -277,6 +292,35 @@ impl ComparisonSummary {
}
}

/// The average improvement as a percent
pub fn average_improvement(&self) -> f64 {
self.average(self.improvements())
}

/// The average regression as a percent
pub fn average_regression(&self) -> f64 {
self.average(self.regressions())
}

fn average<'a>(&'a self, changes: impl Iterator<Item = &'a TestResultComparison>) -> f64 {
let mut count = 0;
let mut sum = 0.0;
for r in changes {
sum += r.relative_change();
count += 1;
}

(sum / count as f64) * 100.0
}

fn improvements(&self) -> impl Iterator<Item = &TestResultComparison> {
self.comparisons.iter().filter(|c| c.is_improvement())
}

fn regressions(&self) -> impl Iterator<Item = &TestResultComparison> {
self.comparisons.iter().filter(|c| c.is_regression())
}

fn largest_improvement(&self) -> Option<&TestResultComparison> {
self.comparisons.iter().find(|s| s.is_improvement())
}
Expand Down Expand Up @@ -322,10 +366,7 @@ impl ComparisonSummary {
let end = &comparison.b.artifact;
let link = &compare_link(start, end);

for change in self.relevant_changes().iter().filter_map(|s| *s) {
write!(result, "- ").unwrap();
change.summary_line(&mut result, Some(link))
}
self.write_summary_lines(&mut result, Some(link));

if !comparison.new_errors.is_empty() {
write!(
Expand All @@ -343,6 +384,30 @@ impl ComparisonSummary {

result
}

pub fn write_summary_lines(&self, result: &mut String, link: Option<&str>) {
use std::fmt::Write;
if self.num_regressions > 1 {
writeln!(
result,
"- Average relevant regression: {:.1}%",
self.average_regression()
)
.unwrap();
}
if self.num_improvements > 1 {
writeln!(
result,
"- Average relevant improvement: {:.1}%",
self.average_improvement()
)
.unwrap();
}
for change in self.most_relevant_changes().iter().filter_map(|s| *s) {
write!(result, "- ").unwrap();
change.summary_line(result, link)
}
}
}

/// The amount of confidence we have that a comparison actually represents a real
Expand Down Expand Up @@ -884,11 +949,12 @@ impl TestResultComparison {
!self.is_regression()
}

fn is_significant(&self) -> bool {
/// Whther the comparison yielded a statistically significant result
pub fn is_significant(&self) -> bool {
self.relative_change().abs() >= self.significance_threshold()
}

// Magnitude of change considered significant
/// Magnitude of change considered significant
fn significance_threshold(&self) -> f64 {
if !self.calc_new_sig {
if self.is_dodgy() {
Expand Down Expand Up @@ -984,24 +1050,19 @@ impl TestResultComparison {

pub fn summary_line(&self, summary: &mut String, link: Option<&str>) {
use std::fmt::Write;
let magnitude = self.magnitude();

let percent = self.relative_change() * 100.0;
write!(
writeln!(
summary,
"{} {} in {}",
magnitude.display_as_title(),
"Largest {} in {}: {:.1}% on `{}` builds of `{} {}`",
self.direction(),
match link {
Some(l) => format!("[instruction counts]({})", l),
None => "instruction counts".into(),
}
)
.unwrap();
writeln!(
summary,
" (up to {:.1}% on `{}` builds of `{} {}`)",
percent, self.scenario, self.benchmark, self.profile
},
percent,
self.scenario,
self.benchmark,
self.profile
)
.unwrap();
}
Expand Down Expand Up @@ -1063,16 +1124,6 @@ impl Magnitude {
*self >= Self::Medium
}

pub fn display_as_title(&self) -> &'static str {
match self {
Self::VerySmall => "Very small",
Self::Small => "Small",
Self::Medium => "Moderate",
Self::Large => "Large",
Self::VeryLarge => "Very large",
}
}

pub fn display(&self) -> &'static str {
match self {
Self::VerySmall => "very small",
Expand Down
90 changes: 63 additions & 27 deletions site/src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,14 @@ async fn categorize_benchmark(
parent_sha: String,
is_master_commit: bool,
) -> (String, Option<Direction>) {
// Add an "s" to a word unless there's only one.
fn ending(word: &'static str, count: usize) -> std::borrow::Cow<'static, str> {
if count == 1 {
return word.into();
}
format!("{}s", word).into()
}

let comparison = match crate::comparison::compare(
collector::Bound::Commit(parent_sha),
collector::Bound::Commit(commit_sha),
Expand All @@ -667,37 +675,65 @@ async fn categorize_benchmark(
};
const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
let (summary, (direction, magnitude)) =
match ComparisonSummary::summarize_comparison(&comparison) {
Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
let direction_and_magnitude = s.direction_and_magnitude().unwrap();
(s, direction_and_magnitude)
}
_ => {
return (
format!(
"This benchmark run did not return any relevant changes.\n\n{}",
DISAGREEMENT
),
None,
)
}
};
let (summary, direction) = match ComparisonSummary::summarize_comparison(&comparison) {
Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
let direction = s.direction().unwrap();
(s, direction)
}
Some(_) => {
let significant_count = comparison
.statistics
.iter()
.filter(|s| s.is_significant())
.count();
return (
format!(
"This benchmark run did not return any relevant results. {} results were found to be statistically significant but too small to be relevant.\n\n{}",
significant_count,
DISAGREEMENT
),
None,
);
}
None => {
return (
format!(
"This benchmark run did not return any relevant results.\n\n{}",
DISAGREEMENT
),
None,
);
}
};

let category = match direction {
Direction::Improvement => "improvements 🎉",
Direction::Regression => "regressions 😿",
Direction::Mixed => "mixed results 🤷",
let num_improvements = summary.number_of_improvements();
let num_regressions = summary.number_of_regressions();

let description = match direction {
Direction::Improvement => format!(
"{} relevant {} 🎉",
num_improvements,
ending("improvement", num_improvements)
),
Direction::Regression => format!(
"{} relevant {} 😿",
num_regressions,
ending("regression", num_regressions)
),
Direction::Mixed => format!(
"{} relevant {} 🎉 but {} relevant {} 😿",
num_improvements,
ending("improvement", num_improvements),
num_regressions,
ending("regression", num_regressions)
),
};
let mut result = format!(
"This change led to {} relevant {} in compiler performance.\n",
magnitude.display(),
category
"This benchmark run shows {} to instruction counts.\n",
description
);
for change in summary.relevant_changes().iter().filter_map(|c| *c) {
write!(result, "- ").unwrap();
change.summary_line(&mut result, None)
}

summary.write_summary_lines(&mut result, None);
write!(result, "\n{}", DISAGREEMENT).unwrap();
(result, Some(direction))
}
Expand Down