Merge pull request #1283 from rylev/only-label-when-confident

rylev · web-flow · commit 3ba306e327ab · 2022-04-08T10:12:44.000+02:00
Only label PRs as perf regressions if we're confident enough
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
@@ -337,6 +337,14 @@ impl ComparisonSummary {
     pub fn is_relevant(&self) -> bool {
         !self.is_empty()
     }
+
+    pub fn num_changes(&self) -> usize {
+        self.relevant_comparisons.len()
+    }
+
+    pub fn largest_change(&self) -> Option<&TestResultComparison> {
+        self.relevant_comparisons.first()
+    }
 }
 
 async fn write_triage_summary(
@@ -984,7 +992,7 @@ impl TestResultComparison {
     ///
     /// This is the average of the absolute magnitude of the change
     /// and the amount above the significance threshold.
-    fn magnitude(&self) -> Magnitude {
+    pub fn magnitude(&self) -> Magnitude {
         let change = self.relative_change().abs();
         let threshold = self.significance_threshold();
         let over_threshold = if change < threshold * 1.5 {
@@ -1058,7 +1066,7 @@ impl std::hash::Hash for TestResultComparison {
 }
 
 // The direction of a performance change
-#[derive(PartialEq, Eq, Hash, Debug)]
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum Direction {
     Improvement,
     Regression,
diff --git a/site/src/github.rs b/site/src/github.rs
@@ -1,5 +1,5 @@
 use crate::api::github::Issue;
-use crate::comparison::{write_summary_table, ComparisonSummary, Direction};
+use crate::comparison::{write_summary_table, ComparisonSummary, Direction, Magnitude};
 use crate::load::{Config, SiteCtxt, TryCommit};
 
 use anyhow::Context as _;
@@ -558,154 +558,154 @@ pub async fn post_finished(ctxt: &SiteCtxt) {
 ///
 /// `is_master_commit` is used to differentiate messages for try runs and post-merge runs.
 async fn post_comparison_comment(ctxt: &SiteCtxt, commit: QueuedCommit, is_master_commit: bool) {
+    let pr = commit.pr;
+    let body = summarize_run(ctxt, commit, is_master_commit).await;
+
+    post_comment(&ctxt.config, pr, body).await;
+}
+
+async fn summarize_run(ctxt: &SiteCtxt, commit: QueuedCommit, is_master_commit: bool) -> String {
     let comparison_url = format!(
         "https://perf.rust-lang.org/compare.html?start={}&end={}",
         commit.parent_sha, commit.sha
     );
-    let (summary, direction) =
-        categorize_benchmark(ctxt, commit.sha.clone(), commit.parent_sha).await;
+    let comparison = match crate::comparison::compare(
+        collector::Bound::Commit(commit.parent_sha),
+        collector::Bound::Commit(commit.sha.clone()),
+        "instructions:u".to_owned(),
+        ctxt,
+    )
+    .await
+    {
+        Ok(Some(c)) => c,
+        _ => return String::from("ERROR categorizing benchmark run!"),
+    };
+
+    let errors = if !comparison.newly_failed_benchmarks.is_empty() {
+        let benchmarks = comparison
+            .newly_failed_benchmarks
+            .iter()
+            .map(|(benchmark, _)| format!("- {benchmark}"))
+            .collect::<Vec<_>>()
+            .join("\n");
+        format!("\n**Warning ⚠**: The following benchmark(s) failed to build:\n{benchmarks}\n")
+    } else {
+        String::new()
+    };
+
+    let benchmark_map = ctxt.get_benchmark_category_map().await;
+    let (primary, secondary) = comparison.summarize_by_category(benchmark_map);
+
+    const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
+    please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
+    let footer = format!("{DISAGREEMENT}{errors}");
+
+    if !primary.is_relevant() && !secondary.is_relevant() {
+        return format!("This benchmark run did not return any relevant results.\n\n{footer}");
+    }
+
+    let primary_short_summary = generate_short_summary(&primary);
+    let secondary_short_summary = generate_short_summary(&secondary);
+
+    let mut summary = format!(
+        r#"
+- Primary benchmarks: {primary_short_summary}
+- Secondary benchmarks: {secondary_short_summary}
+"#
+    );
+    write!(summary, "\n\n").unwrap();
+
+    write_summary_table(&primary, &secondary, true, &mut summary);
 
-    let body = format!(
-        "Finished benchmarking commit ({sha}): [comparison url]({url}).
+    write!(summary, "\n{footer}").unwrap();
+
+    let direction = primary.direction().or(secondary.direction());
+    let next_steps = next_steps(primary, secondary, direction, is_master_commit);
+
+    format!(
+        "Finished benchmarking commit ({sha}): [comparison url]({comparison_url}).
 
 **Summary**: {summary}
-{rest}",
+{next_steps}",
         sha = commit.sha,
-        url = comparison_url,
-        summary = summary,
-        rest = if is_master_commit {
-            master_run_body(direction)
-        } else {
-            try_run_body(direction)
+    )
+}
+
+fn next_steps(
+    primary: ComparisonSummary,
+    secondary: ComparisonSummary,
+    direction: Option<Direction>,
+    is_master_commit: bool,
+) -> String {
+    // whether we are confident enough this is a real change and it deserves to be looked at
+    let deserves_attention = match (primary.largest_change(), secondary.largest_change()) {
+        (Some(c), _) if c.magnitude() >= Magnitude::Medium => true,
+        (_, Some(c)) if c.magnitude() >= Magnitude::Medium => true,
+        _ => {
+            let primary_n = primary.num_changes();
+            let secondary_n = secondary.num_changes();
+            (primary_n * 2 + secondary_n) >= 6
         }
-    );
+    };
+    let label = match (deserves_attention, direction) {
+        (true, Some(Direction::Regression | Direction::Mixed)) => "+perf-regression",
+        _ => "-perf-regression",
+    };
 
-    post_comment(&ctxt.config, commit.pr, body).await;
+    if is_master_commit {
+        master_run_body(label)
+    } else {
+        try_run_body(label)
+    }
 }
 
-fn master_run_body(direction: Option<Direction>) -> String {
-    let label = match direction {
-        Some(Direction::Regression | Direction::Mixed) => "+perf-regression",
-        Some(Direction::Improvement) | None => "-perf-regression",
-    };
-    let next_steps = match direction {
-        Some(Direction::Regression | Direction::Mixed) => {
-            "\n\n**Next Steps**: If you can justify the \
+fn master_run_body(label: &str) -> String {
+    let next_steps = if label.starts_with("+") {
+        "\n\n**Next Steps**: If you can justify the \
                 regressions found in this perf run, please indicate this with \
                 `@rustbot label: +perf-regression-triaged` along with \
                 sufficient written justification. If you cannot justify the regressions \
                 please open an issue or create a new PR that fixes the regressions, \
                 add a comment linking to the newly created issue or PR, \
                 and then add the `perf-regression-triaged` label to this PR."
-        }
-        Some(Direction::Improvement) | None => "",
+    } else {
+        ""
     };
 
     format!(
         "
 {next_steps}
 
 @rustbot label: {label}",
-        next_steps = next_steps,
-        label = label
     )
 }
 
-fn try_run_body(direction: Option<Direction>) -> String {
-    let label = match direction {
-        Some(Direction::Regression | Direction::Mixed) => "+perf-regression",
-        Some(Direction::Improvement) | None => "-perf-regression",
-    };
-    let next_steps = match direction {
-        Some(Direction::Regression | Direction::Mixed) => {
-            "\n\n**Next Steps**: If you can justify the regressions found in \
+fn try_run_body(label: &str) -> String {
+    let next_steps = if label.starts_with("+") {
+        "\n\n**Next Steps**: If you can justify the regressions found in \
             this try perf run, please indicate this with \
             `@rustbot label: +perf-regression-triaged` along with \
             sufficient written justification. If you cannot justify the regressions \
             please fix the regressions and do another perf run. If the next run \
             shows neutral or positive results, the label will be automatically removed."
-        }
-        Some(Direction::Improvement) | None => "",
+    } else {
+        ""
     };
 
     format!(
         "
 Benchmarking this pull request likely means that it is \
 perf-sensitive, so we're automatically marking it as not fit \
 for rolling up. While you can manually mark this PR as fit \
-for rollup, we strongly recommend not doing so since this PR led to changes in \
+for rollup, we strongly recommend not doing so since this PR may lead to changes in \
 compiler perf.{next_steps}
 
 @bors rollup=never
 @rustbot label: +S-waiting-on-review -S-waiting-on-perf {label}",
-        next_steps = next_steps,
-        label = label
     )
 }
 
-async fn categorize_benchmark(
-    ctxt: &SiteCtxt,
-    commit_sha: String,
-    parent_sha: String,
-) -> (String, Option<Direction>) {
-    let comparison = match crate::comparison::compare(
-        collector::Bound::Commit(parent_sha),
-        collector::Bound::Commit(commit_sha),
-        "instructions:u".to_owned(),
-        ctxt,
-    )
-    .await
-    {
-        Ok(Some(c)) => c,
-        _ => return (String::from("ERROR categorizing benchmark run!"), None),
-    };
-
-    let errors = if !comparison.newly_failed_benchmarks.is_empty() {
-        let benchmarks = comparison
-            .newly_failed_benchmarks
-            .iter()
-            .map(|(benchmark, _)| format!("- {benchmark}"))
-            .collect::<Vec<_>>()
-            .join("\n");
-        format!("\n**Warning ⚠**: The following benchmark(s) failed to build:\n{benchmarks}\n")
-    } else {
-        String::new()
-    };
-
-    let benchmark_map = ctxt.get_benchmark_category_map().await;
-    let (primary, secondary) = comparison.summarize_by_category(benchmark_map);
-
-    const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
-    please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
-    let footer = format!("{DISAGREEMENT}{errors}");
-
-    if !primary.is_relevant() && !secondary.is_relevant() {
-        return (
-            format!("This benchmark run did not return any relevant results.\n\n{footer}"),
-            None,
-        );
-    }
-
-    let (primary_short_summary, primary_direction) = generate_short_summary(&primary);
-    let (secondary_short_summary, secondary_direction) = generate_short_summary(&secondary);
-
-    let mut result = format!(
-        r#"
-- Primary benchmarks: {primary_short_summary}
-- Secondary benchmarks: {secondary_short_summary}
-"#
-    );
-    write!(result, "\n\n").unwrap();
-
-    write_summary_table(&primary, &secondary, true, &mut result);
-
-    write!(result, "\n{footer}").unwrap();
-
-    let direction = primary_direction.or(secondary_direction);
-    (result, direction)
-}
-
-fn generate_short_summary(summary: &ComparisonSummary) -> (String, Option<Direction>) {
+fn generate_short_summary(summary: &ComparisonSummary) -> String {
     // Add an "s" to a word unless there's only one.
     fn ending(word: &'static str, count: usize) -> std::borrow::Cow<'static, str> {
         if count == 1 {
@@ -714,25 +714,20 @@ fn generate_short_summary(summary: &ComparisonSummary) -> (String, Option<Direct
         format!("{}s", word).into()
     }
 
-    if summary.is_relevant() {
-        let direction = summary.direction().unwrap();
-        let num_improvements = summary.number_of_improvements();
-        let num_regressions = summary.number_of_regressions();
+    let num_improvements = summary.number_of_improvements();
+    let num_regressions = summary.number_of_regressions();
 
-        let text = match direction {
-            Direction::Improvement => format!(
-                "🎉 relevant {} found",
-                ending("improvement", num_improvements)
-            ),
-            Direction::Regression => format!(
-                "😿 relevant {} found",
-                ending("regression", num_regressions)
-            ),
-            Direction::Mixed => "mixed results".to_string(),
-        };
-        (text, Some(direction))
-    } else {
-        ("no relevant changes found".to_string(), None)
+    match summary.direction() {
+        Some(Direction::Improvement) => format!(
+            "🎉 relevant {} found",
+            ending("improvement", num_improvements)
+        ),
+        Some(Direction::Regression) => format!(
+            "😿 relevant {} found",
+            ending("regression", num_regressions)
+        ),
+        Some(Direction::Mixed) => "mixed results".to_string(),
+        None => "no relevant changes found".to_string(),
     }
 }