Merge pull request #1023 from rylev/filter-by-magnitude

rylev · web-flow · commit 24e25c630d0d · 2021-09-21T20:06:45.000+02:00
Add ability to filter out 'very small' changes
diff --git a/site/src/api.rs b/site/src/api.rs
@@ -201,6 +201,7 @@ pub mod comparison {
         pub is_significant: bool,
         pub significance_factor: Option<f64>,
         pub is_dodgy: bool,
+        pub magnitude: String,
         pub historical_statistics: Option<Vec<f64>>,
         pub statistics: (f64, f64),
     }
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
@@ -115,6 +115,7 @@ pub async fn handle_compare(
             is_dodgy: comparison.is_dodgy(),
             is_significant: comparison.is_significant(),
             significance_factor: comparison.significance_factor(),
+            magnitude: comparison.magnitude().display().to_owned(),
             historical_statistics: comparison.variance.map(|v| v.data),
             statistics: comparison.results,
         })
@@ -149,7 +150,8 @@ async fn populate_report(
 }
 
 pub struct ComparisonSummary {
-    /// Significant comparisons ordered by magnitude from largest to smallest
+    /// Significant comparisons of magnitude small and above
+    /// and ordered by magnitude from largest to smallest
     comparisons: Vec<TestResultComparison>,
 }
 
@@ -159,6 +161,7 @@ impl ComparisonSummary {
             .statistics
             .iter()
             .filter(|c| c.is_significant())
+            .filter(|c| c.magnitude().is_small_or_above())
             .cloned()
             .collect::<Vec<_>>();
         // Skip empty commits, sometimes happens if there's a compiler bug or so.
@@ -269,27 +272,22 @@ impl ComparisonSummary {
     }
 
     pub fn confidence(&self) -> ComparisonConfidence {
-        let mut num_very_small_changes = 0;
         let mut num_small_changes = 0;
         let mut num_medium_changes = 0;
         for c in self.comparisons.iter() {
             match c.magnitude() {
-                Magnitude::VerySmall => num_very_small_changes += 1,
                 Magnitude::Small => num_small_changes += 1,
                 Magnitude::Medium => num_medium_changes += 1,
                 Magnitude::Large => return ComparisonConfidence::DefinitelyRelevant,
                 Magnitude::VeryLarge => return ComparisonConfidence::DefinitelyRelevant,
+                Magnitude::VerySmall => unreachable!(),
             }
         }
 
-        match (
-            num_very_small_changes,
-            num_small_changes,
-            num_medium_changes,
-        ) {
-            (_, _, m) if m > 1 => ComparisonConfidence::DefinitelyRelevant,
-            (_, _, m) if m > 0 => ComparisonConfidence::ProbablyRelevant,
-            (vs, s, _) if (s * 2) + vs > 10 => ComparisonConfidence::ProbablyRelevant,
+        match (num_small_changes, num_medium_changes) {
+            (_, m) if m > 1 => ComparisonConfidence::DefinitelyRelevant,
+            (_, 1) => ComparisonConfidence::ProbablyRelevant,
+            (s, 0) if s > 10 => ComparisonConfidence::ProbablyRelevant,
             _ => ComparisonConfidence::MaybeRelevant,
         }
     }
@@ -866,9 +864,9 @@ impl TestResultComparison {
             Magnitude::VerySmall
         } else if change < threshold * 3.0 {
             Magnitude::Small
-        } else if change < threshold * 10.0 {
+        } else if change < threshold * 6.0 {
             Magnitude::Medium
-        } else if change < threshold * 25.0 {
+        } else if change < threshold * 18.0 {
             Magnitude::Large
         } else {
             Magnitude::VeryLarge
@@ -1002,6 +1000,10 @@ pub enum Magnitude {
 }
 
 impl Magnitude {
+    fn is_small_or_above(&self) -> bool {
+        *self >= Self::Small
+    }
+
     fn is_medium_or_above(&self) -> bool {
         *self >= Self::Medium
     }
diff --git a/site/static/compare.html b/site/static/compare.html
@@ -313,7 +313,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                     <ul id="states-list">
                         <li>
                             <label>
-                                <input type="checkbox" id="build-full" v-model="filter.cache.full" />
+                                <input type="checkbox" id="build-full" v-model="filter.scenario.full" />
                                 <span class="cache-label">full</span>
                             </label>
                             <div class="tooltip">?
@@ -324,7 +324,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                         </li>
                         <li>
                             <label>
-                                <input type="checkbox" id="build-incremental-full" v-model="filter.cache.incrFull" />
+                                <input type="checkbox" id="build-incremental-full" v-model="filter.scenario.incrFull" />
                                 <span class="cache-label">incr-full</span>
                             </label>
                             <div class="tooltip">?
@@ -336,7 +336,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                         <li>
                             <label>
                                 <input type="checkbox" id="build-incremental-unchanged"
-                                    v-model="filter.cache.incrUnchanged" />
+                                    v-model="filter.scenario.incrUnchanged" />
                                 <span class="cache-label">incr-unchanged</span>
                             </label>
                             <div class="tooltip">?
@@ -349,7 +349,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                         <li>
                             <label>
                                 <input type="checkbox" id="build-incremental-patched"
-                                    v-model="filter.cache.incrPatched" />
+                                    v-model="filter.scenario.incrPatched" />
                                 <span class="cache-label">incr-patched</span>
                             </label>
                             <div class="tooltip">?
@@ -374,6 +374,19 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                     </div>
                     <input type="checkbox" v-model="filter.showOnlySignificant" style="margin-left: 20px;" />
                 </div>
+                <div class="section">
+                    <div class="section-heading"><span>Filter out very small changes</span>
+                        <span class="tooltip">?
+                            <span class="tooltiptext">
+                                Whether to filter out test cases that have a very small magnitude. Magnitude is
+                                calculated both on the absolute magnitude (i.e., how large of a percentage change)
+                                as well as the magnitude of the significance (i.e., by how many time the change was
+                                over the significance threshold).
+                            </span>
+                        </span>
+                    </div>
+                    <input type="checkbox" v-model="filter.filterVerySmall" style="margin-left: 20px;" />
+                </div>
             </div>
         </fieldset>
         <div v-if="data" id="content" style="margin-top: 15px">
@@ -430,30 +443,31 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                 </tbody>
                 <template v-for="bench in benches">
                     <tbody>
-                        <template v-for="run in bench.variants">
+                        <template v-for="testCase in bench.testCases">
                             <tr>
-                                <th v-if="run.first" v-bind:rowspan="bench.variants.length">{{bench.name}}</th>
-                                <td>{{ run.casename }}</td>
+                                <th v-if="testCase.first" v-bind:rowspan="bench.testCases.length">{{bench.name}}</th>
+                                <td>{{ testCase.scenario }}</td>
                                 <td>
-                                    <a v-bind:href="detailedQueryLink(data.a.commit, bench.name, run.casename)">
-                                        {{ run.datumA }}
+                                    <a v-bind:href="detailedQueryLink(data.a.commit, bench.name, testCase.scenario)">
+                                        {{ testCase.datumA }}
                                     </a>
                                 </td>
                                 <td>
-                                    <a v-bind:href="detailedQueryLink(data.b.commit, bench.name, run.casename)">
-                                        {{ run.datumB }}
+                                    <a v-bind:href="detailedQueryLink(data.b.commit, bench.name, testCase.scenario)">
+                                        {{ testCase.datumB }}
                                     </a>
                                 </td>
                                 <td>
                                     <a
-                                        v-bind:href="percentLink(data.b.commit, data.a.commit, bench.name, run.casename)">
-                                        <span v-bind:class="percentClass(run.percent)">
-                                            {{ run.percent.toFixed(2) }}%{{run.isDodgy ? "?" : ""}}
+                                        v-bind:href="percentLink(data.b.commit, data.a.commit, bench.name, testCase.scenario)">
+                                        <span v-bind:class="percentClass(testCase.percent)">
+                                            {{ testCase.percent.toFixed(2) }}%{{testCase.isDodgy ? "?" : ""}}
                                         </span>
                                     </a>
                                 </td>
                                 <td>
-                                    {{ run.significance_factor ? run.significance_factor.toFixed(2) + "x" :"-" }}
+                                    {{ testCase.significanceFactor ? testCase.significanceFactor.toFixed(2) + "x" :"-"
+                                    }}
                                 </td>
                             </tr>
                         </template>
@@ -515,7 +529,8 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                 filter: {
                     name: null,
                     showOnlySignificant: true,
-                    cache: {
+                    filterVerySmall: true,
+                    scenario: {
                         full: true,
                         incrFull: true,
                         incrUnchanged: true,
@@ -541,51 +556,62 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                     let data = this.data;
                     const filter = this.filter;
 
-                    function shouldShowBuild(name) {
-                        if (name === "full") {
-                            return filter.cache.full;
-                        } else if (name === "incr-full") {
-                            return filter.cache.incrFull;
-                        } else if (name === "incr-unchanged") {
-                            return filter.cache.incrUnchanged;
-                        } else if (name.startsWith("incr-patched")) {
-                            return filter.cache.incrPatched;
+                    function scenarioFilter(scenario) {
+                        if (scenario === "full") {
+                            return filter.scenario.full;
+                        } else if (scenario === "incr-full") {
+                            return filter.scenario.incrFull;
+                        } else if (scenario === "incr-unchanged") {
+                            return filter.scenario.incrUnchanged;
+                        } else if (scenario.startsWith("incr-patched")) {
+                            return filter.scenario.incrPatched;
                         } else {
                             // Unknown, but by default we should show things
                             return true;
                         }
                     }
-                    function toVariants(results) {
-                        let variants = [];
+
+                    function shouldShowTestCase(testCase) {
+                        let nameFilter = filter.name && filter.name.trim();
+                        nameFilter = !nameFilter || (testCase.benchmark + "-" + testCase.profile).includes(nameFilter);
+
+                        const significanceFilter = filter.showOnlySignificant ? testCase.isSignificant : true;
+
+                        const magnitudeFilter = filter.filterVerySmall ? testCase.magnitude != "very small" : true;
+
+                        return scenarioFilter(testCase.scenario) && significanceFilter && nameFilter && magnitudeFilter;
+                    }
+
+                    function toTestCases(results) {
+                        let testCases = [];
                         for (let r of results) {
-                            const scenarioName = r.scenario;
+                            const scenario = r.scenario;
                             const datumA = r.statistics[0];
                             const datumB = r.statistics[1];
                             const isSignificant = r.is_significant;
-                            let percent = (100 * (datumB - datumA) / datumA);
-                            let isDodgy = r.is_dodgy;
-                            if (shouldShowBuild(scenarioName)) {
-                                variants.push({
-                                    casename: scenarioName,
-                                    datumA,
-                                    datumB,
-                                    percent,
-                                    isDodgy,
-                                    significance_factor: r.significance_factor,
-                                    isSignificant
-                                });
+                            const significanceFactor = r.significance_factor;
+                            const isDodgy = r.is_dodgy;
+                            let percent = 100 * ((datumB - datumA) / datumA);
+                            let testCase = {
+                                scenario,
+                                datumA,
+                                datumB,
+                                isSignificant,
+                                magnitude: r.magnitude,
+                                significanceFactor,
+                                isDodgy,
+                                percent,
+                            };
+                            if (shouldShowTestCase(testCase)) {
+                                testCases.push(testCase);
                             }
                         }
 
-                        return variants;
+                        return testCases;
                     }
 
                     let benches =
                         data.comparisons.
-                            filter(n => {
-                                const f = filter.name && filter.name.trim();
-                                return !f || (n.benchmark + "-" + n.profile).includes(f);
-                            }).
                             reduce((accum, next) => {
                                 const key = next.benchmark + "-" + next.profile;
                                 if (!accum[key]) {
@@ -598,22 +624,22 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                         map(c => {
                             const name = c[0];
                             const comparison = c[1];
-                            const variants = toVariants(comparison).filter(v => filter.showOnlySignificant ? v.isSignificant : true);
-                            const pcts = variants.map(field => parseFloat(field.percent));
+                            const testCases = toTestCases(comparison);
+                            const pcts = testCases.map(tc => parseFloat(tc.percent));
                             const maxPct = Math.max(...pcts).toFixed(1);
                             const minPct = Math.min(...pcts).toFixed(1);
-                            if (variants.length > 0) {
-                                variants[0].first = true;
+                            if (testCases.length > 0) {
+                                testCases[0].first = true;
                             }
 
                             return {
                                 name,
-                                variants,
+                                testCases,
                                 maxPct,
                                 minPct,
                             };
                         }).
-                        filter(b => b.variants.length > 0);
+                        filter(b => b.testCases.length > 0);
 
                     const largestChange = a => Math.max(Math.abs(a.minPct), Math.abs(a.maxPct));
                     // Sort by name first, so that there is a canonical ordering
@@ -679,7 +705,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                 },
                 summary() {
                     // Create object with each test case that is not filtered out as a key
-                    const filtered = Object.fromEntries(this.benches.flatMap(b => b.variants.map(v => [b.name + "-" + v.casename, true])));
+                    const filtered = Object.fromEntries(this.benches.flatMap(b => b.testCases.map(v => [b.name + "-" + v.scenario, true])));
                     const newCount = { regressions: 0, improvements: 0, unchanged: 0 }
                     let result = { all: { ...newCount }, filtered: { ...newCount } }
                     for (let d of this.data.comparisons) {
@@ -734,11 +760,11 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
                     return klass;
 
                 },
-                detailedQueryLink(commit, bench, run) {
-                    return `/detailed-query.html?commit=${commit}&benchmark=${bench}&run_name=${run}`;
+                detailedQueryLink(commit, bench, testCase) {
+                    return `/detailed-query.html?commit=${commit}&benchmark=${bench}&run_name=${testCase}`;
                 },
-                percentLink(commit, baseCommit, bench, run) {
-                    return `/detailed-query.html?commit=${commit}&base_commit=${baseCommit}&benchmark=${bench}&run_name=${run}`;
+                percentLink(commit, baseCommit, bench, testCase) {
+                    return `/detailed-query.html?commit=${commit}&base_commit=${baseCommit}&benchmark=${bench}&run_name=${testCase}`;
                 },
                 commitLink(commit) {
                     return `https://github.com/rust-lang/rust/commit/${commit}`;
@@ -823,4 +849,4 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
     </script>
 </body>
 
-</html>
+</html>

Original file line number	Diff line number	Diff line change
`@@ -201,6 +201,7 @@ pub mod comparison {`
`201`	`201`	`pub is_significant: bool,`
`202`	`202`	`pub significance_factor: Option<f64>,`
`203`	`203`	`pub is_dodgy: bool,`
	`204`	`+ pub magnitude: String,`
`204`	`205`	`pub historical_statistics: Option<Vec<f64>>,`
`205`	`206`	`pub statistics: (f64, f64),`
`206`	`207`	`}`