Add ability to use old significance scheme

rylev · rylev · commit 42db59e45605 · 2021-09-09T13:24:12.000+02:00
diff --git a/site/src/api.rs b/site/src/api.rs
@@ -147,10 +147,12 @@ pub mod comparison {
     use std::collections::HashMap;
 
     #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
+    #[allow(non_snake_case)]
     pub struct Request {
         pub start: Bound,
         pub end: Bound,
         pub stat: String,
+        pub calcNewSig: Option<bool>,
     }
 
     #[derive(Debug, Clone, Serialize)]
@@ -358,9 +360,11 @@ pub mod triage {
     use serde::{Deserialize, Serialize};
 
     #[derive(Debug, Clone, Serialize, Deserialize)]
+    #[allow(non_snake_case)]
     pub struct Request {
         pub start: Bound,
         pub end: Option<Bound>,
+        pub calcNewSig: Option<bool>,
     }
 
     #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
@@ -45,6 +45,7 @@ pub async fn handle_triage(
             "instructions:u".to_owned(),
             ctxt,
             &master_commits,
+            body.calcNewSig.unwrap_or(false),
         )
         .await?
         {
@@ -89,10 +90,16 @@ pub async fn handle_compare(
 ) -> Result<api::comparison::Response, BoxedError> {
     let master_commits = collector::master_commits().await?;
     let end = body.end;
-    let comparison =
-        compare_given_commits(body.start, end.clone(), body.stat, ctxt, &master_commits)
-            .await?
-            .ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;
+    let comparison = compare_given_commits(
+        body.start,
+        end.clone(),
+        body.stat,
+        ctxt,
+        &master_commits,
+        body.calcNewSig.unwrap_or(false),
+    )
+    .await?
+    .ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;
 
     let conn = ctxt.conn().await;
     let prev = comparison.prev(&master_commits);
@@ -338,7 +345,7 @@ pub async fn compare(
     ctxt: &SiteCtxt,
 ) -> Result<Option<Comparison>, BoxedError> {
     let master_commits = collector::master_commits().await?;
-    compare_given_commits(start, end, stat, ctxt, &master_commits).await
+    compare_given_commits(start, end, stat, ctxt, &master_commits, false).await
 }
 
 /// Compare two bounds on a given stat
@@ -348,6 +355,7 @@ async fn compare_given_commits(
     stat: String,
     ctxt: &SiteCtxt,
     master_commits: &[collector::MasterCommit],
+    calc_new_sig: bool,
 ) -> Result<Option<Comparison>, BoxedError> {
     let a = ctxt
         .artifact_id_for_bound(start.clone(), true)
@@ -387,6 +395,7 @@ async fn compare_given_commits(
                         .as_ref()
                         .and_then(|v| v.data.get(&test_case).cloned()),
                     results: (a, b),
+                    calc_new_sig,
                 })
         })
         .collect();
@@ -650,6 +659,30 @@ impl BenchmarkVariance {
         deltas
     }
 
+    fn upper_fence(&self) -> f64 {
+        let pcs = self.percent_changes();
+        fn median(data: &[f64]) -> f64 {
+            if data.len() % 2 == 0 {
+                (data[(data.len() - 1) / 2] + data[data.len() / 2]) / 2.0
+            } else {
+                data[data.len() / 2]
+            }
+        }
+
+        let len = pcs.len();
+        let (h1_end, h2_begin) = if len % 2 == 0 {
+            (len / 2 - 2, len / 2 + 1)
+        } else {
+            (len / 2 - 1, len / 2 + 1)
+        };
+        // significance is determined by the upper
+        // interquartile range fence
+        let q1 = median(&pcs[..=h1_end]);
+        let q3 = median(&pcs[h2_begin..]);
+        let iqr = q3 - q1;
+        q3 + (iqr * 1.5)
+    }
+
     fn calculate_description(&mut self) {
         self.description = BenchmarkVarianceDescription::Normal;
 
@@ -689,11 +722,15 @@ impl BenchmarkVariance {
     }
 
     /// Whether we can trust this benchmark or not
-    fn is_dodgy(&self) -> bool {
-        matches!(
-            self.description,
-            BenchmarkVarianceDescription::Noisy | BenchmarkVarianceDescription::HighlyVariable
-        )
+    fn is_dodgy(&self, calc_new_sig: bool) -> bool {
+        if !calc_new_sig {
+            matches!(
+                self.description,
+                BenchmarkVarianceDescription::Noisy | BenchmarkVarianceDescription::HighlyVariable
+            )
+        } else {
+            self.upper_fence() > 0.002
+        }
     }
 }
 
@@ -748,13 +785,18 @@ pub struct TestResultComparison {
     scenario: Scenario,
     variance: Option<BenchmarkVariance>,
     results: (f64, f64),
+    calc_new_sig: bool,
 }
 
 impl TestResultComparison {
     /// The amount of relative change considered significant when
     /// we cannot determine from historical data
     const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD: f64 = 0.002;
 
+    /// The amount of relative change considered significant when
+    /// the test case is dodgy
+    const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY: f64 = 0.008;
+
     fn is_regression(&self) -> bool {
         let (a, b) = self.results;
         b > a
@@ -769,30 +811,17 @@ impl TestResultComparison {
     }
 
     fn signifcance_threshold(&self) -> f64 {
-        if let Some(pcs) = self.variance.as_ref().map(|s| s.percent_changes()) {
-            fn median(data: &[f64]) -> f64 {
-                if data.len() % 2 == 0 {
-                    (data[(data.len() - 1) / 2] + data[data.len() / 2]) / 2.0
-                } else {
-                    data[data.len() / 2]
-                }
-            }
-
-            let len = pcs.len();
-            let (h1_end, h2_begin) = if len % 2 == 0 {
-                (len / 2 - 2, len / 2 + 1)
+        if !self.calc_new_sig {
+            if self.is_dodgy() {
+                Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY
             } else {
-                (len / 2 - 1, len / 2 + 1)
-            };
-            // significance is determined by the upper
-            // interquartile range fence
-            let q1 = median(&pcs[..=h1_end]);
-            let q3 = median(&pcs[h2_begin..]);
-            let iqr = q3 - q1;
-            let upper_fence = q3 + (iqr * 1.5);
-            upper_fence
+                Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
+            }
         } else {
-            Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
+            self.variance
+                .as_ref()
+                .map(|s| s.upper_fence())
+                .unwrap_or(Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD)
         }
     }
 
@@ -810,6 +839,9 @@ impl TestResultComparison {
         } else {
             Magnitude::VeryLarge
         };
+        if !self.calc_new_sig {
+            return over_threshold;
+        }
         let change_magnitude = if change < 0.002 {
             Magnitude::VerySmall
         } else if change < 0.01 {
@@ -846,7 +878,7 @@ impl TestResultComparison {
     fn is_dodgy(&self) -> bool {
         self.variance
             .as_ref()
-            .map(|v| v.is_dodgy())
+            .map(|v| v.is_dodgy(self.calc_new_sig))
             .unwrap_or(false)
     }
 
diff --git a/site/static/compare.html b/site/static/compare.html
@@ -766,6 +766,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                 end: "",
                 stat: "instructions:u",
             }, state);
+            values["calcNewSig"] = values.calcNewSig === 'true';
             makeRequest("/get", values).then(function (data) {
                 app.data = data;
             });