Skip to content

Commit 123c4bc

Browse files
committed
Differentiate between primary and secondary benchmarks in summary table
1 parent 9f9a5b0 commit 123c4bc

File tree

2 files changed

+207
-99
lines changed

2 files changed

+207
-99
lines changed

site/src/comparison.rs

Lines changed: 184 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::selector::{self, Tag};
1111
use collector::Bound;
1212
use serde::Serialize;
1313

14+
use database::BenchmarkData;
1415
use std::collections::{HashMap, HashSet};
1516
use std::error::Error;
1617
use std::hash::Hash;
@@ -193,9 +194,9 @@ impl ComparisonSummary {
193194
.cloned()
194195
.collect::<Vec<_>>();
195196
// Skip empty commits, sometimes happens if there's a compiler bug or so.
196-
if comparisons.len() == 0 {
197-
return None;
198-
}
197+
// if comparisons.len() == 0 {
198+
// return None;
199+
// }
199200

200201
let cmp = |b1: &TestResultComparison, b2: &TestResultComparison| {
201202
b2.relative_change()
@@ -421,61 +422,103 @@ impl ComparisonSummary {
421422
.unwrap();
422423
}
423424
}
425+
}
424426

425-
/// Writes a Markdown table containing summary of relevant results.
426-
pub fn write_summary_table(&self, result: &mut String) {
427-
use std::fmt::Write;
427+
pub type BenchmarkMap = HashMap<Benchmark, BenchmarkData>;
428428

429-
fn render_stat<F: FnOnce() -> Option<f64>>(count: usize, calculate: F) -> String {
430-
let value = if count > 0 {
431-
calculate()
432-
} else {
433-
None
434-
};
435-
value.map(|value| format!("{value:.1}%")).unwrap_or_else(|| "N/A".to_string())
436-
}
429+
/// Writes a Markdown table containing summary of relevant results.
430+
pub fn write_summary_table(
431+
primary: &ComparisonSummary,
432+
secondary: &ComparisonSummary,
433+
result: &mut String,
434+
) {
435+
use std::fmt::Write;
437436

438-
writeln!(
439-
result,
440-
r#"| | Regressions 😿 | Improvements 🎉 | All relevant changes |
441-
|:---:|:---:|:---:|:---:|"#
442-
)
443-
.unwrap();
444-
writeln!(
445-
result,
446-
"| count[^1] | {} | {} | {} |",
447-
self.num_regressions,
448-
self.num_improvements,
449-
self.num_regressions + self.num_improvements
450-
)
451-
.unwrap();
452-
writeln!(
453-
result,
454-
"| mean[^2] | {} | {} | {:.1}% |",
455-
render_stat(self.num_regressions, || Some(self.arithmetic_mean_of_regressions())),
456-
render_stat(self.num_improvements, || Some(self.arithmetic_mean_of_improvements())),
457-
self.arithmetic_mean_of_changes()
458-
)
459-
.unwrap();
437+
fn render_stat<F: FnOnce() -> Option<f64>>(count: usize, calculate: F) -> String {
438+
let value = if count > 0 { calculate() } else { None };
439+
value
440+
.map(|value| format!("{value:.1}%"))
441+
.unwrap_or_else(|| "N/A".to_string())
442+
}
443+
444+
writeln!(
445+
result,
446+
r#"| | Regressions 😿 <br> (primary) | Regressions 😿 <br> (secondary) | Improvements 🎉 <br> (primary) | Improvements 🎉 <br> (secondary) | All relevant changes |
447+
|:---:|:---:|:---:|:---:|:---:|:---:|"#
448+
)
449+
.unwrap();
450+
writeln!(
451+
result,
452+
"| count[^1] | {} | {} | {} | {} | {} |",
453+
primary.num_regressions,
454+
secondary.num_regressions,
455+
primary.num_improvements,
456+
secondary.num_improvements,
457+
primary.num_regressions
458+
+ primary.num_improvements
459+
+ secondary.num_regressions
460+
+ secondary.num_improvements
461+
)
462+
.unwrap();
463+
464+
writeln!(
465+
result,
466+
"| mean[^2] | {} | {} | {} | {} | {:.1}% |",
467+
render_stat(primary.num_regressions, || Some(
468+
primary.arithmetic_mean_of_regressions()
469+
)),
470+
render_stat(secondary.num_regressions, || Some(
471+
secondary.arithmetic_mean_of_regressions()
472+
)),
473+
render_stat(primary.num_improvements, || Some(
474+
primary.arithmetic_mean_of_improvements()
475+
)),
476+
render_stat(secondary.num_improvements, || Some(
477+
secondary.arithmetic_mean_of_improvements()
478+
)),
479+
// TODO:
480+
// 1) compute average of averages?
481+
// 2) merge two profiles and compute average?
482+
// 3) pass merged profile as a parameter?
483+
primary.arithmetic_mean_of_changes()
484+
)
485+
.unwrap();
460486

461-
let largest_change = self.most_relevant_changes().iter().fold(0.0, |accum: f64, item| {
487+
// TODO: same question
488+
let largest_change = primary
489+
.most_relevant_changes()
490+
.iter()
491+
.fold(0.0, |accum: f64, item| {
462492
let change = item.map(|v| v.relative_change() * 100.0).unwrap_or(0.0);
463493
accum.max(change)
464494
});
465495

466-
writeln!(
467-
result,
468-
"| max | {} | {} | {:.1}% |",
469-
render_stat(self.num_regressions, || self.largest_regression().map(|r| r.relative_change() * 100.0)),
470-
render_stat(self.num_improvements, || self.largest_improvement().map(|r| r.relative_change() * 100.0)),
471-
largest_change
472-
)
473-
.unwrap();
496+
writeln!(
497+
result,
498+
"| max | {} | {} | {} | {} | {:.1}% |",
499+
render_stat(primary.num_regressions, || primary
500+
.largest_regression()
501+
.map(|r| r.relative_change() * 100.0)),
502+
render_stat(secondary.num_regressions, || secondary
503+
.largest_regression()
504+
.map(|r| r.relative_change() * 100.0)),
505+
render_stat(primary.num_improvements, || primary
506+
.largest_improvement()
507+
.map(|r| r.relative_change() * 100.0)),
508+
render_stat(secondary.num_improvements, || secondary
509+
.largest_improvement()
510+
.map(|r| r.relative_change() * 100.0)),
511+
largest_change
512+
)
513+
.unwrap();
474514

475-
writeln!(result, r#"
515+
writeln!(
516+
result,
517+
r#"
476518
[^1]: *number of relevant changes*
477-
[^2]: *the arithmetic mean of the percent change*"#).unwrap();
478-
}
519+
[^2]: *the arithmetic mean of the percent change*"#
520+
)
521+
.unwrap();
479522
}
480523

481524
/// The amount of confidence we have that a comparison actually represents a real
@@ -1291,83 +1334,130 @@ fn compare_link(start: &ArtifactId, end: &ArtifactId) -> String {
12911334

12921335
#[cfg(test)]
12931336
mod tests {
1294-
use std::collections::HashSet;
1337+
use std::collections::{HashMap, HashSet};
12951338

1296-
use database::{ArtifactId, Profile, Scenario};
1339+
use database::{ArtifactId, BenchmarkData, Profile, Scenario};
12971340

12981341
use crate::comparison::{
1299-
ArtifactDescription, Comparison, ComparisonSummary, TestResultComparison,
1342+
write_summary_table, ArtifactDescription, Comparison, ComparisonSummary,
1343+
TestResultComparison,
13001344
};
13011345

1302-
#[test]
1303-
fn summary_table_only_improvements() {
1304-
let summary = create_summary(vec![(10.0, 5.0), (8.0, 2.0)]);
1305-
check_table(
1306-
summary, r#"
1307-
| | Regressions 😿 | Improvements 🎉 | All relevant changes |
1308-
|:---:|:---:|:---:|:---:|
1309-
| count[^1] | 0 | 2 | 2 |
1310-
| mean[^2] | N/A | -62.5% | -62.5% |
1311-
| max | N/A | -75.0% | 0.0% |
1346+
/*#[test]
1347+
fn summary_table_only_improvements() {
1348+
let summary = create_summary(vec![(10.0, 5.0), (8.0, 2.0)]);
1349+
check_table(
1350+
summary,
1351+
r#"
1352+
| | Regressions 😿 | Improvements 🎉 | All relevant changes |
1353+
|:---:|:---:|:---:|:---:|
1354+
| count[^1] | 0 | 2 | 2 |
1355+
| mean[^2] | N/A | -62.5% | -62.5% |
1356+
| max | N/A | -75.0% | 0.0% |
1357+
1358+
[^1]: *number of relevant changes*
1359+
[^2]: *the arithmetic mean of the percent change*
1360+
"#
1361+
.trim_start(),
1362+
);
1363+
}
13121364
1313-
[^1]: *number of relevant changes*
1314-
[^2]: *the arithmetic mean of the percent change*
1315-
"#.trim_start(),
1316-
);
1317-
}
1365+
#[test]
1366+
fn summary_table_only_regressions() {
1367+
let summary = create_summary(vec![(5.0, 10.0), (1.0, 3.0)]);
1368+
check_table(
1369+
summary,
1370+
r#"
1371+
| | Regressions 😿 | Improvements 🎉 | All relevant changes |
1372+
|:---:|:---:|:---:|:---:|
1373+
| count[^1] | 2 | 0 | 2 |
1374+
| mean[^2] | 150.0% | N/A | 150.0% |
1375+
| max | 200.0% | N/A | 200.0% |
1376+
1377+
[^1]: *number of relevant changes*
1378+
[^2]: *the arithmetic mean of the percent change*
1379+
"#
1380+
.trim_start(),
1381+
);
1382+
}*/
13181383

13191384
#[test]
1320-
fn summary_table_only_regressions() {
1321-
let summary = create_summary(vec![(5.0, 10.0), (1.0, 3.0)]);
1385+
fn summary_table_mixed_primary() {
13221386
check_table(
1323-
summary, r#"
1324-
| | Regressions 😿 | Improvements 🎉 | All relevant changes |
1325-
|:---:|:---:|:---:|:---:|
1326-
| count[^1] | 2 | 0 | 2 |
1327-
| mean[^2] | 150.0% | N/A | 150.0% |
1328-
| max | 200.0% | N/A | 200.0% |
1387+
vec![
1388+
("primary", 10.0, 5.0),
1389+
("primary", 5.0, 10.0),
1390+
("primary", 1.0, 3.0),
1391+
("primary", 4.0, 1.0),
1392+
],
1393+
r#"
1394+
| | Regressions 😿 <br> (primary) | Regressions 😿 <br> (secondary) | Improvements 🎉 <br> (primary) | Improvements 🎉 <br> (secondary) | All relevant changes |
1395+
|:---:|:---:|:---:|:---:|:---:|:---:|
1396+
| count[^1] | 2 | 0 | 2 | 0 | 4 |
1397+
| mean[^2] | 150.0% | N/A | -62.5% | N/A | 43.8% |
1398+
| max | 200.0% | N/A | -75.0% | N/A | 200.0% |
13291399
13301400
[^1]: *number of relevant changes*
13311401
[^2]: *the arithmetic mean of the percent change*
1332-
"#.trim_start(),
1402+
"#
1403+
.trim_start(),
13331404
);
13341405
}
13351406

13361407
#[test]
1337-
fn summary_table_mixed() {
1338-
let summary = create_summary(vec![(10.0, 5.0), (5.0, 10.0), (1.0, 3.0), (4.0, 1.0)]);
1408+
fn summary_table_mixed_primary_secondary() {
13391409
check_table(
1340-
summary, r#"
1341-
| | Regressions 😿 | Improvements 🎉 | All relevant changes |
1342-
|:---:|:---:|:---:|:---:|
1343-
| count[^1] | 2 | 2 | 4 |
1344-
| mean[^2] | 150.0% | -62.5% | 43.8% |
1345-
| max | 200.0% | -75.0% | 200.0% |
1410+
vec![
1411+
("primary", 10.0, 5.0),
1412+
("primary", 5.0, 10.0),
1413+
("secondary", 5.0, 10.0),
1414+
("primary", 1.0, 3.0),
1415+
("secondary", 3.0, 1.0),
1416+
("primary", 4.0, 1.0),
1417+
],
1418+
r#"
1419+
| | Regressions 😿 <br> (primary) | Regressions 😿 <br> (secondary) | Improvements 🎉 <br> (primary) | Improvements 🎉 <br> (secondary) | All relevant changes |
1420+
|:---:|:---:|:---:|:---:|:---:|:---:|
1421+
| count[^1] | 2 | 1 | 2 | 1 | 6 |
1422+
| mean[^2] | 150.0% | 100.0% | -62.5% | -66.7% | 43.8% |
1423+
| max | 200.0% | 100.0% | -75.0% | -66.7% | 200.0% |
13461424
13471425
[^1]: *number of relevant changes*
13481426
[^2]: *the arithmetic mean of the percent change*
1349-
"#.trim_start(),
1427+
"#
1428+
.trim_start(),
13501429
);
13511430
}
13521431

1353-
fn check_table(summary: ComparisonSummary, expected: &str) {
1354-
let mut result = String::new();
1355-
summary.write_summary_table(&mut result);
1356-
assert_eq!(result, expected);
1357-
}
1432+
// (category, before, after)
1433+
fn check_table(values: Vec<(&str, f64, f64)>, expected: &str) {
1434+
let mut primary_statistics = HashSet::new();
1435+
let mut secondary_statistics = HashSet::new();
1436+
for (index, (category, before, after)) in values.into_iter().enumerate() {
1437+
let target = if category == "primary" {
1438+
&mut primary_statistics
1439+
} else {
1440+
&mut secondary_statistics
1441+
};
13581442

1359-
fn create_summary(values: Vec<(f64, f64)>) -> ComparisonSummary {
1360-
let mut statistics = HashSet::new();
1361-
for (index, diff) in values.into_iter().enumerate() {
1362-
statistics.insert(TestResultComparison {
1443+
target.insert(TestResultComparison {
13631444
benchmark: index.to_string().as_str().into(),
13641445
profile: Profile::Check,
13651446
scenario: Scenario::Empty,
13661447
variance: None,
1367-
results: diff,
1448+
results: (before, after),
13681449
});
13691450
}
13701451

1452+
let primary = create_summary(primary_statistics);
1453+
let secondary = create_summary(secondary_statistics);
1454+
1455+
let mut result = String::new();
1456+
write_summary_table(&primary, &secondary, &mut result);
1457+
assert_eq!(result, expected);
1458+
}
1459+
1460+
fn create_summary(statistics: HashSet<TestResultComparison>) -> ComparisonSummary {
13711461
let comparison = Comparison {
13721462
a: ArtifactDescription {
13731463
artifact: ArtifactId::Tag("a".to_string()),

site/src/github.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use crate::api::github::Issue;
2-
use crate::comparison::{ComparisonConfidence, ComparisonSummary, Direction};
2+
use crate::comparison::{
3+
write_summary_table, BenchmarkMap, ComparisonConfidence, ComparisonSummary, Direction,
4+
};
35
use crate::load::{Config, SiteCtxt, TryCommit};
46

57
use anyhow::Context as _;
@@ -648,6 +650,14 @@ compiler perf.{next_steps}
648650
)
649651
}
650652

653+
async fn get_benchmark_map(ctxt: &SiteCtxt) -> BenchmarkMap {
654+
let benchmarks = ctxt.pool.connection().await.get_benchmarks().await;
655+
benchmarks
656+
.into_iter()
657+
.map(|bench| (bench.name.as_str().into(), bench))
658+
.collect()
659+
}
660+
651661
async fn categorize_benchmark(
652662
ctxt: &SiteCtxt,
653663
commit_sha: String,
@@ -710,14 +720,22 @@ async fn categorize_benchmark(
710720
let num_regressions = summary.number_of_regressions();
711721

712722
let short_summary = match direction {
713-
Direction::Improvement => format!("🎉 relevant {} found", ending("improvement", num_improvements)),
714-
Direction::Regression => format!("😿 relevant {} found", ending("regression", num_regressions)),
723+
Direction::Improvement => format!(
724+
"🎉 relevant {} found",
725+
ending("improvement", num_improvements)
726+
),
727+
Direction::Regression => format!(
728+
"😿 relevant {} found",
729+
ending("regression", num_regressions)
730+
),
715731
Direction::Mixed => "mixed results".to_string(),
716732
};
717733

734+
let benchmark_map = get_benchmark_map(ctxt).await;
735+
718736
let mut result = format!("Summary: {short_summary}\n");
719-
summary.write_summary_table(&mut result);
720-
write!(result, "\n{}", DISAGREEMENT).unwrap();
737+
// write_summary_table(&summary, &benchmark_map, &mut result); TODO
738+
// write!(result, "\n{}", DISAGREEMENT).unwrap();
721739
(result, Some(direction))
722740
}
723741

0 commit comments

Comments
 (0)