Skip to content

[benchmark] Report Quantiles from Benchmark_O and a TON of Gardening #19097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Sep 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e6cff27
[benchmark] Gardening: Indentation of .listTests
palimondo Aug 29, 2018
b373132
[benchmark] Gardening: Code format class Timer
palimondo Aug 29, 2018
c1a694d
[benchmark] Gardening: Extract constant oneSecond
palimondo Aug 29, 2018
974994c
[benchmark] Gardening: Timer Parasite Control
palimondo Aug 29, 2018
963995f
[benchmark] Refactor mean and stdev computation
palimondo Aug 29, 2018
5cd9f53
[benchmark] Refactor min max median computation
palimondo Aug 30, 2018
0db20fe
[benchmark] Fix index computation for quantiles
palimondo Aug 30, 2018
aa4b849
[benchmark] Move stats computation to BenchResults
palimondo Aug 30, 2018
77dff0a
[benchmark] Gardening: afterRunSleep is UInt32
palimondo Aug 30, 2018
bf4a343
[benchmark] Gardening: numSamples UInt vs Int
palimondo Aug 30, 2018
a03aede
[benchmark] Gardening: scale was always Int
palimondo Aug 30, 2018
f017d98
[benchmark] Refactor to report samples in μs
palimondo Aug 30, 2018
9c4876e
[benchmark] Refactor to currency type Int
palimondo Aug 30, 2018
46bef89
[benchmark] Gardening: DRYer verbose log
palimondo Aug 30, 2018
6e27af7
[benchmark] Gardening: Sensibly rename variables
palimondo Aug 30, 2018
3c55e30
[benchmark] Gardening: Documentation of numIters
palimondo Aug 30, 2018
46ee2a4
[benchmark] Refactor sampling loop with addSample
palimondo Aug 30, 2018
be39c02
[benchmark] Refactor numIters computation
palimondo Aug 30, 2018
265f537
[benchmark] Extract yield & add resetMeasurements
palimondo Aug 31, 2018
cdcb631
[benchmark] Refactor run runBenchmarks logVerbose
palimondo Aug 31, 2018
df3b385
[benchmark] Gardening: Fixed method indentation
palimondo Aug 31, 2018
ae7d82b
[benchmark] Gardening: Even nicer microseconds
palimondo Aug 31, 2018
6079d4f
[benchmark] Rename SampleRunner -> TestRunner
palimondo Aug 31, 2018
1f465b9
[benchmark] Report quantiles from samples
palimondo Aug 31, 2018
13e7c3f
[benchmark] Gardening maxRSS as Int?
palimondo Sep 2, 2018
313dfda
[benchmark] Option: delta encoded quantiles format
palimondo Sep 2, 2018
84bf158
[benchmark] Doctor explicitly measures memory
palimondo Sep 6, 2018
a56c55c
[benchmark] Round quantile idx to nearest or even
palimondo Sep 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/scripts/Benchmark_Driver
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ class BenchmarkDoctor(object):
measurements = dict(
[('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix),
self.driver.run(benchmark, num_samples=s, num_iters=i,
verbose=True))
verbose=True, measure_memory=True))
for o in opts
for s, i in run_args
for suffix in list('abcde')
Expand Down
19 changes: 16 additions & 3 deletions benchmark/scripts/compare_perf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class `ReportFormatter` creates the test comparison report in specified format.
import sys
from bisect import bisect, bisect_left, bisect_right
from collections import namedtuple
from decimal import Decimal, ROUND_HALF_EVEN
from math import sqrt


Expand Down Expand Up @@ -141,20 +142,32 @@ def max(self):
"""Maximum sampled value."""
return self.samples[-1].runtime

def quantile(self, q):
"""Return runtime of a sample nearest to the quantile.

Explicitly uses round-half-to-even rounding algorithm to match the
behavior of numpy's quantile(interpolation='nearest') and quantile
estimate type R-3, SAS-2. See:
https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
"""
index = int(Decimal((self.count - 1) * Decimal(q))
.quantize(0, ROUND_HALF_EVEN))
return self.samples[index].runtime

@property
def median(self):
"""Median sampled value."""
return self.samples[self.count / 2].runtime
return self.quantile(0.5)

@property
def q1(self):
"""First Quartile (25th Percentile)."""
return self.samples[self.count / 4].runtime
return self.quantile(0.25)

@property
def q3(self):
"""Third Quartile (75th Percentile)."""
return self.samples[(self.count / 2) + (self.count / 4)].runtime
return self.quantile(0.75)

@property
def iqr(self):
Expand Down
4 changes: 2 additions & 2 deletions benchmark/scripts/test_Benchmark_Driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,10 +476,10 @@ def test_measure_10_independent_1s_benchmark_series(self):
# 5x i1 series, with 300 μs runtime its possible to take 4098
# samples/s, but it should be capped at 2k
([(_run('B1', num_samples=2048, num_iters=1,
verbose=True), _PTR(min=300))] * 5) +
verbose=True, measure_memory=True), _PTR(min=300))] * 5) +
# 5x i2 series
([(_run('B1', num_samples=2048, num_iters=2,
verbose=True), _PTR(min=300))] * 5)
verbose=True, measure_memory=True), _PTR(min=300))] * 5)
))
doctor = BenchmarkDoctor(self.args, driver)
with captured_output() as (out, _):
Expand Down
12 changes: 6 additions & 6 deletions benchmark/scripts/test_compare_perf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ def test_computes_five_number_summary(self):
self.samples, (1000, 1000, 1000, 1000, 1000))
self.samples.add(Sample(2, 1, 1100))
self.assertEqualFiveNumberSummary(
self.samples, (1000, 1000, 1100, 1100, 1100))
self.samples, (1000, 1000, 1000, 1100, 1100))
self.samples.add(Sample(3, 1, 1050))
self.assertEqualFiveNumberSummary(
self.samples, (1000, 1000, 1050, 1050, 1100))
self.samples, (1000, 1000, 1050, 1100, 1100))
self.samples.add(Sample(4, 1, 1025))
self.assertEqualFiveNumberSummary(
self.samples, (1000, 1025, 1050, 1100, 1100))
self.samples, (1000, 1025, 1050, 1050, 1100))
self.samples.add(Sample(5, 1, 1075))
self.assertEqualFiveNumberSummary(
self.samples, (1000, 1025, 1050, 1075, 1100))
Expand Down Expand Up @@ -156,11 +156,12 @@ def test_excludes_outliers_zero_IQR(self):
self.samples.add(Sample(0, 2, 23))
self.samples.add(Sample(1, 2, 18))
self.samples.add(Sample(2, 2, 18))
self.samples.add(Sample(3, 2, 18))
self.assertEquals(self.samples.iqr, 0)

self.samples.exclude_outliers()

self.assertEquals(self.samples.count, 2)
self.assertEquals(self.samples.count, 3)
self.assertEqualStats(
(self.samples.min, self.samples.max), (18, 18))

Expand Down Expand Up @@ -368,7 +369,6 @@ def test_parse_results_verbose(self):
Sample 0,11812
Measuring with scale 90.
Sample 1,13898
Measuring with scale 91.
Sample 2,11467
1,AngryPhonebook,3,11467,13898,12392,1315,11812
Running Array2D for 3 samples.
Expand All @@ -388,7 +388,7 @@ def test_parse_results_verbose(self):
)
self.assertEquals(r.num_samples, r.samples.num_samples)
self.assertEquals(results[0].samples.all_samples,
[(0, 78, 11812), (1, 90, 13898), (2, 91, 11467)])
[(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)])

r = results[1]
self.assertEquals(
Expand Down
Loading