Skip to content

Commit f5af99b

Browse files
authored
Merge pull request #1156 from rylev/improve-summary
Improve the summary message both in GitHub and in the triage report
2 parents 601d74e + 176a8a6 commit f5af99b

File tree

2 files changed

+160
-73
lines changed

2 files changed

+160
-73
lines changed

site/src/comparison.rs

Lines changed: 97 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -163,19 +163,36 @@ async fn populate_report(
163163
}
164164
}
165165

166+
/// A summary of a given comparison
167+
///
168+
/// This summary only includes changes that are significant and relevant (as determined by a changes magnitude).
166169
pub struct ComparisonSummary {
167170
/// Significant comparisons of magnitude small and above
168171
/// and ordered by magnitude from largest to smallest
169172
comparisons: Vec<TestResultComparison>,
173+
/// The cached number of comparisons that are improvements
174+
num_improvements: usize,
175+
/// The cached number of comparisons that are regressions
176+
num_regressions: usize,
170177
}
171178

172179
impl ComparisonSummary {
173180
pub fn summarize_comparison(comparison: &Comparison) -> Option<ComparisonSummary> {
181+
let mut num_improvements = 0;
182+
let mut num_regressions = 0;
183+
174184
let mut comparisons = comparison
175185
.statistics
176186
.iter()
177187
.filter(|c| c.is_significant())
178188
.filter(|c| c.magnitude().is_small_or_above())
189+
.inspect(|c| {
190+
if c.is_improvement() {
191+
num_improvements += 1;
192+
} else {
193+
num_regressions += 1
194+
}
195+
})
179196
.cloned()
180197
.collect::<Vec<_>>();
181198
// Skip empty commits, sometimes happens if there's a compiler bug or so.
@@ -191,18 +208,15 @@ impl ComparisonSummary {
191208
};
192209
comparisons.sort_by(cmp);
193210

194-
Some(ComparisonSummary { comparisons })
195-
}
196-
197-
/// Gets the overall direction and magnitude of the changes
198-
///
199-
/// Returns `None` if there are no relevant changes.
200-
pub fn direction_and_magnitude(&self) -> Option<(Direction, Magnitude)> {
201-
self.direction().zip(self.magnitude())
211+
Some(ComparisonSummary {
212+
comparisons,
213+
num_improvements,
214+
num_regressions,
215+
})
202216
}
203217

204218
/// The direction of the changes
205-
fn direction(&self) -> Option<Direction> {
219+
pub fn direction(&self) -> Option<Direction> {
206220
if self.comparisons.len() == 0 {
207221
return None;
208222
}
@@ -258,17 +272,18 @@ impl ComparisonSummary {
258272
}
259273
}
260274

261-
/// Get the largest magnitude of any change in the comparison.
262-
///
263-
/// Returns `None` if there are no relevant_changes
264-
fn magnitude(&self) -> Option<Magnitude> {
265-
[self.largest_improvement(), self.largest_regression()]
266-
.iter()
267-
.filter_map(|c| c.map(|c| c.magnitude()))
268-
.max_by(|c1, c2| c1.cmp(c2))
275+
/// The number of improvements that were found to be significant and relevant
276+
pub fn number_of_improvements(&self) -> usize {
277+
self.num_improvements
278+
}
279+
280+
/// The number of regressions that were found to be significant and relevant
281+
pub fn number_of_regressions(&self) -> usize {
282+
self.num_regressions
269283
}
270284

271-
pub fn relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
285+
/// The most relevant changes (based on size)
286+
pub fn most_relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
272287
match self.direction() {
273288
Some(Direction::Improvement) => [self.largest_improvement(), None],
274289
Some(Direction::Regression) => [self.largest_regression(), None],
@@ -277,6 +292,35 @@ impl ComparisonSummary {
277292
}
278293
}
279294

295+
/// The average improvement as a percent
296+
pub fn average_improvement(&self) -> f64 {
297+
self.average(self.improvements())
298+
}
299+
300+
/// The average regression as a percent
301+
pub fn average_regression(&self) -> f64 {
302+
self.average(self.regressions())
303+
}
304+
305+
fn average<'a>(&'a self, changes: impl Iterator<Item = &'a TestResultComparison>) -> f64 {
306+
let mut count = 0;
307+
let mut sum = 0.0;
308+
for r in changes {
309+
sum += r.relative_change();
310+
count += 1;
311+
}
312+
313+
(sum / count as f64) * 100.0
314+
}
315+
316+
fn improvements(&self) -> impl Iterator<Item = &TestResultComparison> {
317+
self.comparisons.iter().filter(|c| c.is_improvement())
318+
}
319+
320+
fn regressions(&self) -> impl Iterator<Item = &TestResultComparison> {
321+
self.comparisons.iter().filter(|c| c.is_regression())
322+
}
323+
280324
fn largest_improvement(&self) -> Option<&TestResultComparison> {
281325
self.comparisons.iter().find(|s| s.is_improvement())
282326
}
@@ -322,10 +366,7 @@ impl ComparisonSummary {
322366
let end = &comparison.b.artifact;
323367
let link = &compare_link(start, end);
324368

325-
for change in self.relevant_changes().iter().filter_map(|s| *s) {
326-
write!(result, "- ").unwrap();
327-
change.summary_line(&mut result, Some(link))
328-
}
369+
self.write_summary_lines(&mut result, Some(link));
329370

330371
if !comparison.new_errors.is_empty() {
331372
write!(
@@ -343,6 +384,30 @@ impl ComparisonSummary {
343384

344385
result
345386
}
387+
388+
pub fn write_summary_lines(&self, result: &mut String, link: Option<&str>) {
389+
use std::fmt::Write;
390+
if self.num_regressions > 1 {
391+
writeln!(
392+
result,
393+
"- Average relevant regression: {:.1}%",
394+
self.average_regression()
395+
)
396+
.unwrap();
397+
}
398+
if self.num_improvements > 1 {
399+
writeln!(
400+
result,
401+
"- Average relevant improvement: {:.1}%",
402+
self.average_improvement()
403+
)
404+
.unwrap();
405+
}
406+
for change in self.most_relevant_changes().iter().filter_map(|s| *s) {
407+
write!(result, "- ").unwrap();
408+
change.summary_line(result, link)
409+
}
410+
}
346411
}
347412

348413
/// The amount of confidence we have that a comparison actually represents a real
@@ -884,11 +949,12 @@ impl TestResultComparison {
884949
!self.is_regression()
885950
}
886951

887-
fn is_significant(&self) -> bool {
952+
/// Whther the comparison yielded a statistically significant result
953+
pub fn is_significant(&self) -> bool {
888954
self.relative_change().abs() >= self.significance_threshold()
889955
}
890956

891-
// Magnitude of change considered significant
957+
/// Magnitude of change considered significant
892958
fn significance_threshold(&self) -> f64 {
893959
if !self.calc_new_sig {
894960
if self.is_dodgy() {
@@ -984,24 +1050,19 @@ impl TestResultComparison {
9841050

9851051
pub fn summary_line(&self, summary: &mut String, link: Option<&str>) {
9861052
use std::fmt::Write;
987-
let magnitude = self.magnitude();
988-
9891053
let percent = self.relative_change() * 100.0;
990-
write!(
1054+
writeln!(
9911055
summary,
992-
"{} {} in {}",
993-
magnitude.display_as_title(),
1056+
"Largest {} in {}: {:.1}% on `{}` builds of `{} {}`",
9941057
self.direction(),
9951058
match link {
9961059
Some(l) => format!("[instruction counts]({})", l),
9971060
None => "instruction counts".into(),
998-
}
999-
)
1000-
.unwrap();
1001-
writeln!(
1002-
summary,
1003-
" (up to {:.1}% on `{}` builds of `{} {}`)",
1004-
percent, self.scenario, self.benchmark, self.profile
1061+
},
1062+
percent,
1063+
self.scenario,
1064+
self.benchmark,
1065+
self.profile
10051066
)
10061067
.unwrap();
10071068
}
@@ -1063,16 +1124,6 @@ impl Magnitude {
10631124
*self >= Self::Medium
10641125
}
10651126

1066-
pub fn display_as_title(&self) -> &'static str {
1067-
match self {
1068-
Self::VerySmall => "Very small",
1069-
Self::Small => "Small",
1070-
Self::Medium => "Moderate",
1071-
Self::Large => "Large",
1072-
Self::VeryLarge => "Very large",
1073-
}
1074-
}
1075-
10761127
pub fn display(&self) -> &'static str {
10771128
match self {
10781129
Self::VerySmall => "very small",

site/src/github.rs

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,14 @@ async fn categorize_benchmark(
654654
parent_sha: String,
655655
is_master_commit: bool,
656656
) -> (String, Option<Direction>) {
657+
// Add an "s" to a word unless there's only one.
658+
fn ending(word: &'static str, count: usize) -> std::borrow::Cow<'static, str> {
659+
if count == 1 {
660+
return word.into();
661+
}
662+
format!("{}s", word).into()
663+
}
664+
657665
let comparison = match crate::comparison::compare(
658666
collector::Bound::Commit(parent_sha),
659667
collector::Bound::Commit(commit_sha),
@@ -667,37 +675,65 @@ async fn categorize_benchmark(
667675
};
668676
const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
669677
please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
670-
let (summary, (direction, magnitude)) =
671-
match ComparisonSummary::summarize_comparison(&comparison) {
672-
Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
673-
let direction_and_magnitude = s.direction_and_magnitude().unwrap();
674-
(s, direction_and_magnitude)
675-
}
676-
_ => {
677-
return (
678-
format!(
679-
"This benchmark run did not return any relevant changes.\n\n{}",
680-
DISAGREEMENT
681-
),
682-
None,
683-
)
684-
}
685-
};
678+
let (summary, direction) = match ComparisonSummary::summarize_comparison(&comparison) {
679+
Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
680+
let direction = s.direction().unwrap();
681+
(s, direction)
682+
}
683+
Some(_) => {
684+
let significant_count = comparison
685+
.statistics
686+
.iter()
687+
.filter(|s| s.is_significant())
688+
.count();
689+
return (
690+
format!(
691+
"This benchmark run did not return any relevant results. {} results were found to be statistically significant but too small to be relevant.\n\n{}",
692+
significant_count,
693+
DISAGREEMENT
694+
),
695+
None,
696+
);
697+
}
698+
None => {
699+
return (
700+
format!(
701+
"This benchmark run did not return any relevant results.\n\n{}",
702+
DISAGREEMENT
703+
),
704+
None,
705+
);
706+
}
707+
};
686708

687-
let category = match direction {
688-
Direction::Improvement => "improvements 🎉",
689-
Direction::Regression => "regressions 😿",
690-
Direction::Mixed => "mixed results 🤷",
709+
let num_improvements = summary.number_of_improvements();
710+
let num_regressions = summary.number_of_regressions();
711+
712+
let description = match direction {
713+
Direction::Improvement => format!(
714+
"{} relevant {} 🎉",
715+
num_improvements,
716+
ending("improvement", num_improvements)
717+
),
718+
Direction::Regression => format!(
719+
"{} relevant {} 😿",
720+
num_regressions,
721+
ending("regression", num_regressions)
722+
),
723+
Direction::Mixed => format!(
724+
"{} relevant {} 🎉 but {} relevant {} 😿",
725+
num_improvements,
726+
ending("improvement", num_improvements),
727+
num_regressions,
728+
ending("regression", num_regressions)
729+
),
691730
};
692731
let mut result = format!(
693-
"This change led to {} relevant {} in compiler performance.\n",
694-
magnitude.display(),
695-
category
732+
"This benchmark run shows {} to instruction counts.\n",
733+
description
696734
);
697-
for change in summary.relevant_changes().iter().filter_map(|c| *c) {
698-
write!(result, "- ").unwrap();
699-
change.summary_line(&mut result, None)
700-
}
735+
736+
summary.write_summary_lines(&mut result, None);
701737
write!(result, "\n{}", DISAGREEMENT).unwrap();
702738
(result, Some(direction))
703739
}

0 commit comments

Comments
 (0)