Skip to content

Commit 8fcf884

Browse files
authored
Merge pull request #21684 from palimondo/cells-interlinked
[benchmark] run_smoke_bench with BenchmarkDriver
2 parents 65abb49 + d854f0f commit 8fcf884

File tree

3 files changed

+135
-122
lines changed

3 files changed

+135
-122
lines changed

benchmark/scripts/Benchmark_Driver

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class BenchmarkDriver(object):
5555
self.args = args
5656
self._subprocess = _subprocess or subprocess
5757
self.all_tests = []
58+
self.test_number = {}
5859
self.tests = tests or self._get_tests()
5960
self.parser = parser or LogParser()
6061
self.results = {}
@@ -106,16 +107,17 @@ class BenchmarkDriver(object):
106107

107108
def _get_tests(self):
108109
"""Return a list of performance tests to run."""
109-
index_name_pairs = [
110+
number_name_pairs = [
110111
line.split('\t')[:2] for line in
111112
self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1]
112113
]
113114
# unzip list of pairs into 2 lists
114-
indices, self.all_tests = map(list, zip(*index_name_pairs))
115+
test_numbers, self.all_tests = map(list, zip(*number_name_pairs))
116+
self.test_number = dict(zip(self.all_tests, test_numbers))
115117
if self.args.filters:
116118
return self._tests_matching_patterns()
117119
if self.args.benchmarks:
118-
return self._tests_by_name_or_index(indices)
120+
return self._tests_by_name_or_number(test_numbers)
119121
return self.all_tests
120122

121123
def _tests_matching_patterns(self):
@@ -124,33 +126,44 @@ class BenchmarkDriver(object):
124126
for name in self.all_tests
125127
if pattern.match(name)])))
126128

127-
def _tests_by_name_or_index(self, indices):
129+
def _tests_by_name_or_number(self, test_numbers):
128130
benchmarks = set(self.args.benchmarks)
129-
index_to_name = dict(zip(indices, self.all_tests))
130-
indexed_names = [index_to_name[i]
131-
for i in benchmarks.intersection(set(indices))]
132-
return sorted(list(
133-
benchmarks.intersection(set(self.all_tests)).union(indexed_names)))
134-
135-
def run(self, test, num_samples=None, num_iters=None,
136-
verbose=None, measure_memory=False, quantile=None):
131+
number_to_name = dict(zip(test_numbers, self.all_tests))
132+
tests_by_number = [number_to_name[i]
133+
for i in benchmarks.intersection(set(test_numbers))]
134+
return sorted(list(benchmarks
135+
.intersection(set(self.all_tests))
136+
.union(tests_by_number)))
137+
138+
def run(self, test=None, num_samples=None, num_iters=None,
139+
sample_time=None, verbose=None, measure_memory=False,
140+
quantile=None):
137141
"""Execute benchmark and gather results."""
138142
num_samples = num_samples or 0
139143
num_iters = num_iters or 0 # automatically determine N to run for 1s
144+
sample_time = sample_time or 0 # default is 1s
140145

141146
cmd = self._cmd_run(
142-
test, num_samples, num_iters, verbose, measure_memory, quantile)
147+
test, num_samples, num_iters, sample_time,
148+
verbose, measure_memory, quantile)
143149
output = self._invoke(cmd)
144-
result = self.parser.results_from_string(output).items()[0][1]
145-
return result
146-
147-
def _cmd_run(self, test, num_samples, num_iters, verbose, measure_memory,
148-
quantile):
149-
cmd = [self.test_harness, test]
150+
results = self.parser.results_from_string(output)
151+
return results.items()[0][1] if test else results
152+
153+
def _cmd_run(self, test, num_samples, num_iters, sample_time,
154+
verbose, measure_memory, quantile):
155+
cmd = [self.test_harness]
156+
if test:
157+
cmd.append(test)
158+
else:
159+
cmd.extend([self.test_number.get(name, name)
160+
for name in self.tests])
150161
if num_samples > 0:
151162
cmd.append('--num-samples={0}'.format(num_samples))
152163
if num_iters > 0:
153164
cmd.append('--num-iters={0}'.format(num_iters))
165+
if sample_time > 0:
166+
cmd.append('--sample-time={0}'.format(sample_time))
154167
if verbose:
155168
cmd.append('--verbose')
156169
if measure_memory:
@@ -198,7 +211,6 @@ class BenchmarkDriver(object):
198211
from this method. When `csv_console` is False, the console output
199212
format is justified columns.
200213
"""
201-
202214
format = (
203215
(lambda values: ','.join(values)) if csv_console else
204216
(lambda values: self.RESULT.format(*values))) # justified columns

benchmark/scripts/run_smoke_bench

Lines changed: 74 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,34 @@ from __future__ import print_function
2626
import argparse
2727
import glob
2828
import os
29-
import re
3029
import subprocess
3130
import sys
3231

33-
from compare_perf_tests import LogParser, create_report
32+
from compare_perf_tests import LogParser, TestComparator, create_report
33+
34+
from imp import load_source
35+
# import Benchmark_Driver # doesn't work because it misses '.py' extension
36+
Benchmark_Driver = load_source(
37+
'Benchmark_Driver', os.path.join(os.path.dirname(
38+
os.path.abspath(__file__)), 'Benchmark_Driver'))
39+
# from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ...
40+
BenchmarkDriver = Benchmark_Driver.BenchmarkDriver
41+
BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor
42+
MarkdownReportHandler = Benchmark_Driver.MarkdownReportHandler
3443

3544
VERBOSE = False
3645

3746

47+
class DriverArgs(object):
48+
"""Arguments for BenchmarkDriver."""
49+
def __init__(self, tests, optimization='O'):
50+
"""Initialize with path to the build-dir and optimization level."""
51+
self.benchmarks = None
52+
self.filters = None
53+
self.tests = os.path.join(tests, 'bin')
54+
self.optimization = optimization
55+
56+
3857
def log(msg):
3958
print(msg)
4059
sys.stdout.flush()
@@ -129,89 +148,61 @@ def test_opt_levels(args):
129148
return 0
130149

131150

132-
def test_performance(opt_level, old_dir, new_dir, threshold, num_samples,
133-
output_file):
134-
num_results_dont_differ = 0
135-
iter = 1
136-
to_test = None
137-
prev_num_tests = None
151+
def measure(driver, tests, i):
152+
"""Log and measure samples of the tests with the given driver.
138153
139-
old_lines = ""
140-
new_lines = ""
154+
Collect increasing number of samples, depending on the iteration.
155+
"""
156+
num_samples = min(i + 3, 10)
157+
msg = ' Iteration {0} for {1}: num samples = {2}, '.format(
158+
i, driver.args.tests, num_samples)
159+
msg += ('running all tests' if driver.all_tests == tests else
160+
're-testing {0} tests'.format(len(tests)))
161+
log(msg)
162+
driver.tests = tests
163+
return driver.run(num_samples=num_samples, sample_time=0.0025)
141164

142-
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),PEAK_MEMORY(B)
143-
score_re = re.compile(r"(\d+),([\w.\-]+),\d+,(\d+)")
144-
145-
while to_test is None or len(to_test) > 0:
146-
tested_benchmarks = set()
147-
148-
# (benchmark_name, benchmark_directory) -> (min_value, result_line)
149-
values = {}
150-
151-
# Run the benchmarks and store the results in 'values'.
152-
for bench_dir in (old_dir, new_dir):
153-
log(' Iteration ' + str(iter) + ' for ' + bench_dir +
154-
': num samples = ' + str(num_samples) +
155-
(', running all tests' if to_test is None
156-
else ', re-testing ' + str(len(to_test)) + ' tests'))
157-
158-
result = get_results(bench_dir, opt_level, num_samples, to_test)
159-
for line in result.splitlines():
160-
m = score_re.match(line)
161-
if m:
162-
testname = m.group(2)
163-
val = int(m.group(3))
164-
values[(testname, bench_dir)] = (val, line)
165-
tested_benchmarks.add(testname)
166-
167-
# Some local utility functions
168-
169-
def bench_in(bench, bench_dir):
170-
return (bench, bench_dir) in values
171-
172-
def within_threshold(bench):
173-
old_val = values[(bench, old_dir)][0]
174-
new_val = values[(bench, new_dir)][0]
175-
if not new_val:
176-
return True
177-
f = float(old_val) / float(new_val)
178-
return f >= 1.0 - threshold and f <= 1.0 + threshold
179-
180-
def result_line(bench, bench_dir):
181-
result_line = values[(bench, bench_dir)][1]
182-
return result_line + '\n'
183-
184-
# Check which benchmarks are added/removed and which need to be re-run
185-
to_test = []
186-
for bench in sorted(tested_benchmarks):
187-
if bench_in(bench, old_dir) and not bench_in(bench, new_dir):
188-
old_lines += result_line(bench, old_dir)
189-
elif bench_in(bench, new_dir) and not bench_in(bench, old_dir):
190-
new_lines += result_line(bench, new_dir)
191-
elif within_threshold(bench) or num_results_dont_differ >= 4:
192-
old_lines += result_line(bench, old_dir)
193-
new_lines += result_line(bench, new_dir)
194-
else:
195-
to_test.append(bench)
196-
if VERBOSE:
197-
log(' test again ' + bench)
198-
199-
# Track how many times we could not reduce the number of benchmarks
200-
if prev_num_tests == len(to_test):
201-
num_results_dont_differ += 1
202-
else:
203-
num_results_dont_differ = 0
204-
prev_num_tests = len(to_test)
205165

206-
# Increase the number of samples for benchmarks which re-run
207-
if num_samples < 10:
208-
num_samples += 1
166+
def merge(results, other_results):
167+
""""Merge the other PreformanceTestResults into the first dictionary."""
168+
for test, result in other_results.items():
169+
results[test].merge(result)
170+
return results
209171

210-
iter += 1
172+
173+
def test_performance(opt_level, old_dir, new_dir, threshold, num_samples,
174+
output_file):
175+
"""Detect performance changes in benchmarks.
176+
177+
Start fast with few samples per benchmark and gradually spend more time
178+
gathering more precise measurements of the change candidates.
179+
"""
180+
181+
i, unchanged_length_count = 0, 0
182+
old, new = [BenchmarkDriver(DriverArgs(dir, optimization=opt_level))
183+
for dir in [old_dir, new_dir]]
184+
results = [measure(driver, driver.tests, i) for driver in [old, new]]
185+
tests = TestComparator(results[0], results[1], threshold)
186+
changed = tests.decreased + tests.increased
187+
188+
while len(changed) > 0 and unchanged_length_count < 5:
189+
i += 1
190+
if VERBOSE:
191+
log(' test again: ' + str([test.name for test in changed]))
192+
results = [merge(the_results,
193+
measure(driver, [test.name for test in changed], i))
194+
for the_results, driver in zip(results, [old, new])]
195+
tests = TestComparator(results[0], results[1], threshold)
196+
changed = tests.decreased + tests.increased
197+
198+
if len(old.tests) == len(changed):
199+
unchanged_length_count += 1
200+
else:
201+
unchanged_length_count = 0
211202

212203
log('')
213-
return report_results("Performance: -" + opt_level,
214-
old_lines, new_lines, threshold * 1.4, output_file)
204+
return report_results("Performance: -" + opt_level, None, None,
205+
threshold * 1.4, output_file, *results)
215206

216207

217208
def get_results(bench_dir, opt_level, num_samples, to_test):
@@ -274,9 +265,10 @@ def get_codesize(filename):
274265
return int(data_line.split('\t')[0])
275266

276267

277-
def report_results(title, old_lines, new_lines, threshold, output_file):
278-
old_results = LogParser.results_from_string(old_lines)
279-
new_results = LogParser.results_from_string(new_lines)
268+
def report_results(title, old_lines, new_lines, threshold, output_file,
269+
old_results=None, new_results=None):
270+
old_results = old_results or LogParser.results_from_string(old_lines)
271+
new_results = new_results or LogParser.results_from_string(new_lines)
280272

281273
print("------- " + title + " -------")
282274
print(create_report(old_results, new_results, threshold, 'git'))
@@ -332,25 +324,7 @@ performance team (@eeckstein).
332324
return text
333325

334326

335-
class DriverArgs(object):
336-
def __init__(self, tests):
337-
self.benchmarks = None
338-
self.filters = None
339-
self.tests = os.path.join(tests, 'bin')
340-
self.optimization = 'O'
341-
342-
343327
def check_added(args, output_file=None):
344-
from imp import load_source
345-
# import Benchmark_Driver # doesn't work because it misses '.py' extension
346-
Benchmark_Driver = load_source(
347-
'Benchmark_Driver', os.path.join(os.path.dirname(
348-
os.path.abspath(__file__)), 'Benchmark_Driver'))
349-
# from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor
350-
BenchmarkDriver = Benchmark_Driver.BenchmarkDriver
351-
BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor
352-
MarkdownReportHandler = Benchmark_Driver.MarkdownReportHandler
353-
354328
old = BenchmarkDriver(DriverArgs(args.oldbuilddir[0]))
355329
new = BenchmarkDriver(DriverArgs(args.newbuilddir[0]))
356330
added = set(new.tests).difference(set(old.tests))

benchmark/scripts/test_Benchmark_Driver.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,16 @@ def test_test_harness(self):
181181
def test_gets_list_of_precommit_benchmarks(self):
182182
self.subprocess_mock.expect(
183183
'/benchmarks/Benchmark_O --list --delim=\t'.split(' '),
184-
'#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n1\tBenchmark2\t[t3]\n')
184+
'#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n')
185185
driver = BenchmarkDriver(
186186
self.args, _subprocess=self.subprocess_mock)
187187
self.subprocess_mock.assert_called_all_expected()
188188
self.assertEquals(driver.tests,
189189
['Benchmark1', 'Benchmark2'])
190190
self.assertEquals(driver.all_tests,
191191
['Benchmark1', 'Benchmark2'])
192+
self.assertEquals(driver.test_number['Benchmark1'], "1")
193+
self.assertEquals(driver.test_number['Benchmark2'], "2")
192194

193195
list_all_tests = (
194196
'/benchmarks/Benchmark_O --list --delim=\t --skip-tags='.split(' '),
@@ -281,14 +283,39 @@ def test_run_benchmark_with_specified_number_of_iterations(self):
281283
self.subprocess_mock.assert_called_with(
282284
('/benchmarks/Benchmark_O', 'b', '--num-iters=1'))
283285

286+
def test_run_benchmark_for_specified_time(self):
287+
self.driver.run('b', sample_time=0.5)
288+
self.subprocess_mock.assert_called_with(
289+
('/benchmarks/Benchmark_O', 'b', '--sample-time=0.5'))
290+
284291
def test_run_benchmark_in_verbose_mode(self):
285292
self.driver.run('b', verbose=True)
286293
self.subprocess_mock.assert_called_with(
287294
('/benchmarks/Benchmark_O', 'b', '--verbose'))
288295

296+
def test_run_batch(self):
297+
"""Run all active tests in a single execution of the Benchmark_X.
298+
299+
Known test names are passed to the harness in a compressed form as test
300+
numbers.
301+
"""
302+
self.driver.tests = ['b1', 'bx']
303+
self.driver.run()
304+
self.subprocess_mock.assert_called_with(
305+
('/benchmarks/Benchmark_O', '1', 'bx'))
306+
289307
def test_parse_results_from_running_benchmarks(self):
290-
self.driver.run('b')
308+
"""Parse measurements results using LogParser.
309+
310+
Individual test run returns the first PerformanceTestResult directly.
311+
Batch run returns the dictionary of PerformanceTestResults.
312+
"""
313+
r = self.driver.run('b')
291314
self.assertTrue(self.parser_stub.results_from_string_called)
315+
self.assertEquals(r.name, 'b1') # non-matching name, just 1st result
316+
r = self.driver.run()
317+
self.assertTrue(isinstance(r, dict))
318+
self.assertEquals(r['b1'].name, 'b1')
292319

293320
def test_measure_memory(self):
294321
self.driver.run('b', measure_memory=True)

0 commit comments

Comments
 (0)