Skip to content

Commit 42db59e

Browse files
committed
Add ability to use old significance scheme
1 parent 384f918 commit 42db59e

File tree

3 files changed

+70
-33
lines changed

3 files changed

+70
-33
lines changed

site/src/api.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,12 @@ pub mod comparison {
147147
use std::collections::HashMap;
148148

149149
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
150+
#[allow(non_snake_case)]
150151
pub struct Request {
151152
pub start: Bound,
152153
pub end: Bound,
153154
pub stat: String,
155+
pub calcNewSig: Option<bool>,
154156
}
155157

156158
#[derive(Debug, Clone, Serialize)]
@@ -358,9 +360,11 @@ pub mod triage {
358360
use serde::{Deserialize, Serialize};
359361

360362
#[derive(Debug, Clone, Serialize, Deserialize)]
363+
#[allow(non_snake_case)]
361364
pub struct Request {
362365
pub start: Bound,
363366
pub end: Option<Bound>,
367+
pub calcNewSig: Option<bool>,
364368
}
365369

366370
#[derive(Debug, Clone, Serialize, Deserialize)]

site/src/comparison.rs

Lines changed: 65 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pub async fn handle_triage(
4545
"instructions:u".to_owned(),
4646
ctxt,
4747
&master_commits,
48+
body.calcNewSig.unwrap_or(false),
4849
)
4950
.await?
5051
{
@@ -89,10 +90,16 @@ pub async fn handle_compare(
8990
) -> Result<api::comparison::Response, BoxedError> {
9091
let master_commits = collector::master_commits().await?;
9192
let end = body.end;
92-
let comparison =
93-
compare_given_commits(body.start, end.clone(), body.stat, ctxt, &master_commits)
94-
.await?
95-
.ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;
93+
let comparison = compare_given_commits(
94+
body.start,
95+
end.clone(),
96+
body.stat,
97+
ctxt,
98+
&master_commits,
99+
body.calcNewSig.unwrap_or(false),
100+
)
101+
.await?
102+
.ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;
96103

97104
let conn = ctxt.conn().await;
98105
let prev = comparison.prev(&master_commits);
@@ -338,7 +345,7 @@ pub async fn compare(
338345
ctxt: &SiteCtxt,
339346
) -> Result<Option<Comparison>, BoxedError> {
340347
let master_commits = collector::master_commits().await?;
341-
compare_given_commits(start, end, stat, ctxt, &master_commits).await
348+
compare_given_commits(start, end, stat, ctxt, &master_commits, false).await
342349
}
343350

344351
/// Compare two bounds on a given stat
@@ -348,6 +355,7 @@ async fn compare_given_commits(
348355
stat: String,
349356
ctxt: &SiteCtxt,
350357
master_commits: &[collector::MasterCommit],
358+
calc_new_sig: bool,
351359
) -> Result<Option<Comparison>, BoxedError> {
352360
let a = ctxt
353361
.artifact_id_for_bound(start.clone(), true)
@@ -387,6 +395,7 @@ async fn compare_given_commits(
387395
.as_ref()
388396
.and_then(|v| v.data.get(&test_case).cloned()),
389397
results: (a, b),
398+
calc_new_sig,
390399
})
391400
})
392401
.collect();
@@ -650,6 +659,30 @@ impl BenchmarkVariance {
650659
deltas
651660
}
652661

662+
fn upper_fence(&self) -> f64 {
663+
let pcs = self.percent_changes();
664+
fn median(data: &[f64]) -> f64 {
665+
if data.len() % 2 == 0 {
666+
(data[(data.len() - 1) / 2] + data[data.len() / 2]) / 2.0
667+
} else {
668+
data[data.len() / 2]
669+
}
670+
}
671+
672+
let len = pcs.len();
673+
let (h1_end, h2_begin) = if len % 2 == 0 {
674+
(len / 2 - 2, len / 2 + 1)
675+
} else {
676+
(len / 2 - 1, len / 2 + 1)
677+
};
678+
// significance is determined by the upper
679+
// interquartile range fence
680+
let q1 = median(&pcs[..=h1_end]);
681+
let q3 = median(&pcs[h2_begin..]);
682+
let iqr = q3 - q1;
683+
q3 + (iqr * 1.5)
684+
}
685+
653686
fn calculate_description(&mut self) {
654687
self.description = BenchmarkVarianceDescription::Normal;
655688

@@ -689,11 +722,15 @@ impl BenchmarkVariance {
689722
}
690723

691724
/// Whether we can trust this benchmark or not
692-
fn is_dodgy(&self) -> bool {
693-
matches!(
694-
self.description,
695-
BenchmarkVarianceDescription::Noisy | BenchmarkVarianceDescription::HighlyVariable
696-
)
725+
fn is_dodgy(&self, calc_new_sig: bool) -> bool {
726+
if !calc_new_sig {
727+
matches!(
728+
self.description,
729+
BenchmarkVarianceDescription::Noisy | BenchmarkVarianceDescription::HighlyVariable
730+
)
731+
} else {
732+
self.upper_fence() > 0.002
733+
}
697734
}
698735
}
699736

@@ -748,13 +785,18 @@ pub struct TestResultComparison {
748785
scenario: Scenario,
749786
variance: Option<BenchmarkVariance>,
750787
results: (f64, f64),
788+
calc_new_sig: bool,
751789
}
752790

753791
impl TestResultComparison {
754792
/// The amount of relative change considered significant when
755793
/// we cannot determine from historical data
756794
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD: f64 = 0.002;
757795

796+
/// The amount of relative change considered significant when
797+
/// the test case is dodgy
798+
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY: f64 = 0.008;
799+
758800
fn is_regression(&self) -> bool {
759801
let (a, b) = self.results;
760802
b > a
@@ -769,30 +811,17 @@ impl TestResultComparison {
769811
}
770812

771813
fn signifcance_threshold(&self) -> f64 {
772-
if let Some(pcs) = self.variance.as_ref().map(|s| s.percent_changes()) {
773-
fn median(data: &[f64]) -> f64 {
774-
if data.len() % 2 == 0 {
775-
(data[(data.len() - 1) / 2] + data[data.len() / 2]) / 2.0
776-
} else {
777-
data[data.len() / 2]
778-
}
779-
}
780-
781-
let len = pcs.len();
782-
let (h1_end, h2_begin) = if len % 2 == 0 {
783-
(len / 2 - 2, len / 2 + 1)
814+
if !self.calc_new_sig {
815+
if self.is_dodgy() {
816+
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY
784817
} else {
785-
(len / 2 - 1, len / 2 + 1)
786-
};
787-
// significance is determined by the upper
788-
// interquartile range fence
789-
let q1 = median(&pcs[..=h1_end]);
790-
let q3 = median(&pcs[h2_begin..]);
791-
let iqr = q3 - q1;
792-
let upper_fence = q3 + (iqr * 1.5);
793-
upper_fence
818+
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
819+
}
794820
} else {
795-
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
821+
self.variance
822+
.as_ref()
823+
.map(|s| s.upper_fence())
824+
.unwrap_or(Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD)
796825
}
797826
}
798827

@@ -810,6 +839,9 @@ impl TestResultComparison {
810839
} else {
811840
Magnitude::VeryLarge
812841
};
842+
if !self.calc_new_sig {
843+
return over_threshold;
844+
}
813845
let change_magnitude = if change < 0.002 {
814846
Magnitude::VerySmall
815847
} else if change < 0.01 {
@@ -846,7 +878,7 @@ impl TestResultComparison {
846878
fn is_dodgy(&self) -> bool {
847879
self.variance
848880
.as_ref()
849-
.map(|v| v.is_dodgy())
881+
.map(|v| v.is_dodgy(self.calc_new_sig))
850882
.unwrap_or(false)
851883
}
852884

site/static/compare.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
766766
end: "",
767767
stat: "instructions:u",
768768
}, state);
769+
values["calcNewSig"] = values.calcNewSig === 'true';
769770
makeRequest("/get", values).then(function (data) {
770771
app.data = data;
771772
});

0 commit comments

Comments
 (0)