Merge pull request #1156 from rylev/improve-summary

rylev · web-flow · commit f5af99bca60c · 2022-01-25T20:14:48.000+01:00
Improve the summary message both in GitHub and in the triage report
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
@@ -163,19 +163,36 @@ async fn populate_report(
     }
 }
 
+/// A summary of a given comparison
+///
+/// This summary only includes changes that are significant and relevant (as determined by a changes magnitude).
 pub struct ComparisonSummary {
     /// Significant comparisons of magnitude small and above
     /// and ordered by magnitude from largest to smallest
     comparisons: Vec<TestResultComparison>,
+    /// The cached number of comparisons that are improvements
+    num_improvements: usize,
+    /// The cached number of comparisons that are regressions
+    num_regressions: usize,
 }
 
 impl ComparisonSummary {
     pub fn summarize_comparison(comparison: &Comparison) -> Option<ComparisonSummary> {
+        let mut num_improvements = 0;
+        let mut num_regressions = 0;
+
         let mut comparisons = comparison
             .statistics
             .iter()
             .filter(|c| c.is_significant())
             .filter(|c| c.magnitude().is_small_or_above())
+            .inspect(|c| {
+                if c.is_improvement() {
+                    num_improvements += 1;
+                } else {
+                    num_regressions += 1
+                }
+            })
             .cloned()
             .collect::<Vec<_>>();
         // Skip empty commits, sometimes happens if there's a compiler bug or so.
@@ -191,18 +208,15 @@ impl ComparisonSummary {
         };
         comparisons.sort_by(cmp);
 
-        Some(ComparisonSummary { comparisons })
-    }
-
-    /// Gets the overall direction and magnitude of the changes
-    ///
-    /// Returns `None` if there are no relevant changes.
-    pub fn direction_and_magnitude(&self) -> Option<(Direction, Magnitude)> {
-        self.direction().zip(self.magnitude())
+        Some(ComparisonSummary {
+            comparisons,
+            num_improvements,
+            num_regressions,
+        })
     }
 
     /// The direction of the changes
-    fn direction(&self) -> Option<Direction> {
+    pub fn direction(&self) -> Option<Direction> {
         if self.comparisons.len() == 0 {
             return None;
         }
@@ -258,17 +272,18 @@ impl ComparisonSummary {
         }
     }
 
-    /// Get the largest magnitude of any change in the comparison.
-    ///
-    /// Returns `None` if there are no relevant_changes
-    fn magnitude(&self) -> Option<Magnitude> {
-        [self.largest_improvement(), self.largest_regression()]
-            .iter()
-            .filter_map(|c| c.map(|c| c.magnitude()))
-            .max_by(|c1, c2| c1.cmp(c2))
+    /// The number of improvements that were found to be significant and relevant
+    pub fn number_of_improvements(&self) -> usize {
+        self.num_improvements
+    }
+
+    /// The number of regressions that were found to be significant and relevant
+    pub fn number_of_regressions(&self) -> usize {
+        self.num_regressions
     }
 
-    pub fn relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
+    /// The most relevant changes (based on size)
+    pub fn most_relevant_changes<'a>(&'a self) -> [Option<&TestResultComparison>; 2] {
         match self.direction() {
             Some(Direction::Improvement) => [self.largest_improvement(), None],
             Some(Direction::Regression) => [self.largest_regression(), None],
@@ -277,6 +292,35 @@ impl ComparisonSummary {
         }
     }
 
+    /// The average improvement as a percent
+    pub fn average_improvement(&self) -> f64 {
+        self.average(self.improvements())
+    }
+
+    /// The average regression as a percent
+    pub fn average_regression(&self) -> f64 {
+        self.average(self.regressions())
+    }
+
+    fn average<'a>(&'a self, changes: impl Iterator<Item = &'a TestResultComparison>) -> f64 {
+        let mut count = 0;
+        let mut sum = 0.0;
+        for r in changes {
+            sum += r.relative_change();
+            count += 1;
+        }
+
+        (sum / count as f64) * 100.0
+    }
+
+    fn improvements(&self) -> impl Iterator<Item = &TestResultComparison> {
+        self.comparisons.iter().filter(|c| c.is_improvement())
+    }
+
+    fn regressions(&self) -> impl Iterator<Item = &TestResultComparison> {
+        self.comparisons.iter().filter(|c| c.is_regression())
+    }
+
     fn largest_improvement(&self) -> Option<&TestResultComparison> {
         self.comparisons.iter().find(|s| s.is_improvement())
     }
@@ -322,10 +366,7 @@ impl ComparisonSummary {
         let end = &comparison.b.artifact;
         let link = &compare_link(start, end);
 
-        for change in self.relevant_changes().iter().filter_map(|s| *s) {
-            write!(result, "- ").unwrap();
-            change.summary_line(&mut result, Some(link))
-        }
+        self.write_summary_lines(&mut result, Some(link));
 
         if !comparison.new_errors.is_empty() {
             write!(
@@ -343,6 +384,30 @@ impl ComparisonSummary {
 
         result
     }
+
+    pub fn write_summary_lines(&self, result: &mut String, link: Option<&str>) {
+        use std::fmt::Write;
+        if self.num_regressions > 1 {
+            writeln!(
+                result,
+                "- Average relevant regression: {:.1}%",
+                self.average_regression()
+            )
+            .unwrap();
+        }
+        if self.num_improvements > 1 {
+            writeln!(
+                result,
+                "- Average relevant improvement: {:.1}%",
+                self.average_improvement()
+            )
+            .unwrap();
+        }
+        for change in self.most_relevant_changes().iter().filter_map(|s| *s) {
+            write!(result, "- ").unwrap();
+            change.summary_line(result, link)
+        }
+    }
 }
 
 /// The amount of confidence we have that a comparison actually represents a real
@@ -884,11 +949,12 @@ impl TestResultComparison {
         !self.is_regression()
     }
 
-    fn is_significant(&self) -> bool {
+    /// Whther the comparison yielded a statistically significant result
+    pub fn is_significant(&self) -> bool {
         self.relative_change().abs() >= self.significance_threshold()
     }
 
-    // Magnitude of change considered significant
+    /// Magnitude of change considered significant
     fn significance_threshold(&self) -> f64 {
         if !self.calc_new_sig {
             if self.is_dodgy() {
@@ -984,24 +1050,19 @@ impl TestResultComparison {
 
     pub fn summary_line(&self, summary: &mut String, link: Option<&str>) {
         use std::fmt::Write;
-        let magnitude = self.magnitude();
-
         let percent = self.relative_change() * 100.0;
-        write!(
+        writeln!(
             summary,
-            "{} {} in {}",
-            magnitude.display_as_title(),
+            "Largest {} in {}: {:.1}% on `{}` builds of `{} {}`",
             self.direction(),
             match link {
                 Some(l) => format!("[instruction counts]({})", l),
                 None => "instruction counts".into(),
-            }
-        )
-        .unwrap();
-        writeln!(
-            summary,
-            " (up to {:.1}% on `{}` builds of `{} {}`)",
-            percent, self.scenario, self.benchmark, self.profile
+            },
+            percent,
+            self.scenario,
+            self.benchmark,
+            self.profile
         )
         .unwrap();
     }
@@ -1063,16 +1124,6 @@ impl Magnitude {
         *self >= Self::Medium
     }
 
-    pub fn display_as_title(&self) -> &'static str {
-        match self {
-            Self::VerySmall => "Very small",
-            Self::Small => "Small",
-            Self::Medium => "Moderate",
-            Self::Large => "Large",
-            Self::VeryLarge => "Very large",
-        }
-    }
-
     pub fn display(&self) -> &'static str {
         match self {
             Self::VerySmall => "very small",
diff --git a/site/src/github.rs b/site/src/github.rs
@@ -654,6 +654,14 @@ async fn categorize_benchmark(
     parent_sha: String,
     is_master_commit: bool,
 ) -> (String, Option<Direction>) {
+    // Add an "s" to a word unless there's only one.
+    fn ending(word: &'static str, count: usize) -> std::borrow::Cow<'static, str> {
+        if count == 1 {
+            return word.into();
+        }
+        format!("{}s", word).into()
+    }
+
     let comparison = match crate::comparison::compare(
         collector::Bound::Commit(parent_sha),
         collector::Bound::Commit(commit_sha),
@@ -667,37 +675,65 @@ async fn categorize_benchmark(
     };
     const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
     please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
-    let (summary, (direction, magnitude)) =
-        match ComparisonSummary::summarize_comparison(&comparison) {
-            Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
-                let direction_and_magnitude = s.direction_and_magnitude().unwrap();
-                (s, direction_and_magnitude)
-            }
-            _ => {
-                return (
-                    format!(
-                        "This benchmark run did not return any relevant changes.\n\n{}",
-                        DISAGREEMENT
-                    ),
-                    None,
-                )
-            }
-        };
+    let (summary, direction) = match ComparisonSummary::summarize_comparison(&comparison) {
+        Some(s) if comparison_is_relevant(s.confidence(), is_master_commit) => {
+            let direction = s.direction().unwrap();
+            (s, direction)
+        }
+        Some(_) => {
+            let significant_count = comparison
+                .statistics
+                .iter()
+                .filter(|s| s.is_significant())
+                .count();
+            return (
+                format!(
+                    "This benchmark run did not return any relevant results. {} results were found to be statistically significant but too small to be relevant.\n\n{}",
+                    significant_count,
+                    DISAGREEMENT
+                ),
+                None,
+            );
+        }
+        None => {
+            return (
+                format!(
+                    "This benchmark run did not return any relevant results.\n\n{}",
+                    DISAGREEMENT
+                ),
+                None,
+            );
+        }
+    };
 
-    let category = match direction {
-        Direction::Improvement => "improvements 🎉",
-        Direction::Regression => "regressions 😿",
-        Direction::Mixed => "mixed results 🤷",
+    let num_improvements = summary.number_of_improvements();
+    let num_regressions = summary.number_of_regressions();
+
+    let description = match direction {
+        Direction::Improvement => format!(
+            "{} relevant {} 🎉",
+            num_improvements,
+            ending("improvement", num_improvements)
+        ),
+        Direction::Regression => format!(
+            "{} relevant {} 😿",
+            num_regressions,
+            ending("regression", num_regressions)
+        ),
+        Direction::Mixed => format!(
+            "{} relevant {} 🎉 but {} relevant {} 😿",
+            num_improvements,
+            ending("improvement", num_improvements),
+            num_regressions,
+            ending("regression", num_regressions)
+        ),
     };
     let mut result = format!(
-        "This change led to {} relevant {} in compiler performance.\n",
-        magnitude.display(),
-        category
+        "This benchmark run shows {} to instruction counts.\n",
+        description
     );
-    for change in summary.relevant_changes().iter().filter_map(|c| *c) {
-        write!(result, "- ").unwrap();
-        change.summary_line(&mut result, None)
-    }
+
+    summary.write_summary_lines(&mut result, None);
     write!(result, "\n{}", DISAGREEMENT).unwrap();
     (result, Some(direction))
 }