Skip to content

Commit c056e63

Browse files
authored
Merge pull request #61559 from tbkka/tbkka-benchmarking
Overhaul Benchmarking pipeline to use complete sample data, not summaries
2 parents a478034 + 961a38b commit c056e63

File tree

6 files changed

+943
-1129
lines changed

6 files changed

+943
-1129
lines changed

benchmark/scripts/Benchmark_Driver

Lines changed: 66 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ class BenchmarkDriver(object):
8888
def test_harness(self):
8989
"""Full path to test harness binary."""
9090
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
91+
suffix += "-"
9192
if hasattr(self.args, "architecture") and self.args.architecture:
92-
suffix += "-" + self.args.architecture + "*"
93-
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix)
93+
suffix += self.args.architecture
94+
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix + "*")
9495
executables = []
9596
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
9697
executables = [pattern]
@@ -134,48 +135,52 @@ class BenchmarkDriver(object):
134135

135136
@property
136137
def _cmd_list_benchmarks(self):
137-
# Use tab delimiter for easier parsing to override the default comma.
138-
# (The third 'column' is always comma-separated list of tags in square
139-
# brackets -- currently unused here.)
140-
return [self.test_harness, "--list", "--delim=\t"] + (
138+
# TODO: Switch to JSON format: add "--json" here
139+
return [self.test_harness, "--list"] + (
141140
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
142141
)
143142

144143
def _get_tests(self):
145144
"""Return a list of performance tests to run."""
146-
number_name_pairs = [
147-
line.split("\t")[:2]
148-
for line in self._invoke(self._cmd_list_benchmarks).split("\n")[1:-1]
149-
]
150-
# unzip list of pairs into 2 lists
151-
test_numbers, self.all_tests = map(list, zip(*number_name_pairs))
152-
self.test_number = dict(zip(self.all_tests, test_numbers))
145+
lines = self._invoke(self._cmd_list_benchmarks).split("\n")
146+
json_tests = []
147+
for line in lines:
148+
columns = re.split(r'[ ,]+', line.strip())
149+
try:
150+
number = int(columns[0])
151+
name = columns[1]
152+
json_descr = {"number": number, "name": name}
153+
json_tests.append(json_descr)
154+
except Exception:
155+
continue
156+
# TODO: Replace the above with the following to
157+
# use the JSON output from the benchmark driver
158+
# directly
159+
# if line.strip() != "":
160+
# json_tests.append(json.loads(line))
161+
self.all_tests = [json["name"] for json in json_tests]
162+
test_numbers = [json["number"] for json in json_tests]
163+
self.test_number = dict([(json["name"], json["number"]) for json in json_tests])
153164
if self.args.filters:
154165
return self._tests_matching_patterns()
155166
if self.args.benchmarks:
156167
return self._tests_by_name_or_number(test_numbers)
157168
return self.all_tests
158169

159170
def _tests_matching_patterns(self):
160-
regexes = [re.compile(pattern) for pattern in self.args.filters]
161-
return sorted(
162-
list(
163-
set(
164-
[
165-
name
166-
for pattern in regexes
167-
for name in self.all_tests
168-
if pattern.match(name)
169-
]
170-
)
171-
)
172-
)
171+
matches = set()
172+
for fil in self.args.filters:
173+
pattern = re.compile(fil)
174+
new_matches = filter(pattern.match, self.all_tests)
175+
matches = matches.union(new_matches)
176+
return sorted(list(matches))
173177

174178
def _tests_by_name_or_number(self, test_numbers):
175179
benchmarks = set(self.args.benchmarks)
176-
number_to_name = dict(zip(test_numbers, self.all_tests))
180+
numbers = list(map(str, test_numbers))
181+
number_to_name = dict(zip(numbers, self.all_tests))
177182
tests_by_number = [
178-
number_to_name[i] for i in benchmarks.intersection(set(test_numbers))
183+
number_to_name[i] for i in benchmarks.intersection(numbers)
179184
]
180185
return sorted(
181186
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
@@ -188,20 +193,22 @@ class BenchmarkDriver(object):
188193
num_iters=None,
189194
sample_time=None,
190195
verbose=None,
191-
measure_memory=False,
192-
quantile=None,
196+
measure_memory=False
193197
):
194198
"""Execute benchmark and gather results."""
195199
num_samples = num_samples or 0
196200
num_iters = num_iters or 0 # automatically determine N to run for 1s
197201
sample_time = sample_time or 0 # default is 1s
198202

199203
cmd = self._cmd_run(
200-
test, num_samples, num_iters, sample_time, verbose, measure_memory, quantile
204+
test, num_samples, num_iters, sample_time, verbose, measure_memory
201205
)
202206
output = self._invoke(cmd)
203207
results = self.parser.results_from_string(output)
204-
return list(results.items())[0][1] if test else results
208+
if test:
209+
return list(results.items())[0][1]
210+
else:
211+
return results
205212

206213
def _cmd_run(
207214
self,
@@ -210,14 +217,13 @@ class BenchmarkDriver(object):
210217
num_iters,
211218
sample_time,
212219
verbose,
213-
measure_memory,
214-
quantile,
220+
measure_memory
215221
):
216222
cmd = [self.test_harness]
217223
if test:
218224
cmd.append(test)
219225
else:
220-
cmd.extend([self.test_number.get(name, name) for name in self.tests])
226+
cmd.extend([str(self.test_number.get(name, name)) for name in self.tests])
221227
if num_samples > 0:
222228
cmd.append("--num-samples={0}".format(num_samples))
223229
if num_iters > 0:
@@ -228,9 +234,8 @@ class BenchmarkDriver(object):
228234
cmd.append("--verbose")
229235
if measure_memory:
230236
cmd.append("--memory")
231-
if quantile:
232-
cmd.append("--quantile={0}".format(quantile))
233-
cmd.append("--delta")
237+
# TODO: Uncomment this as soon as the new Benchmark Swift logic is available everywhere
238+
# cmd.append("--json")
234239
return cmd
235240

236241
def run_independent_samples(self, test):
@@ -246,12 +251,12 @@ class BenchmarkDriver(object):
246251
return functools.reduce(
247252
merge_results,
248253
[
249-
self.run(test, measure_memory=True, num_iters=1, quantile=20)
254+
self.run(test, measure_memory=True, num_iters=1)
250255
for _ in range(self.args.independent_samples)
251256
],
252257
)
253258

254-
def log_results(self, output, log_file=None):
259+
def log_results(self, results, log_file=None):
255260
"""Log output to `log_file`.
256261
257262
Creates `args.output_dir` if it doesn't exist yet.
@@ -262,7 +267,8 @@ class BenchmarkDriver(object):
262267
os.makedirs(dir)
263268
print("Logging results to: %s" % log_file)
264269
with open(log_file, "w") as f:
265-
f.write(output)
270+
for r in results:
271+
print(r, file=f)
266272

267273
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
268274

@@ -284,25 +290,25 @@ class BenchmarkDriver(object):
284290
def console_log(values):
285291
print(format(values))
286292

287-
def result_values(r):
293+
def summary(r):
288294
return list(
289295
map(
290296
str,
291297
[
292298
r.test_num,
293299
r.name,
294300
r.num_samples,
295-
r.min,
296-
r.samples.q1,
301+
r.min_value,
302+
r.q1,
297303
r.median,
298-
r.samples.q3,
299-
r.max,
304+
r.q3,
305+
r.max_value,
300306
r.max_rss,
301307
],
302308
)
303309
)
304310

305-
header = [
311+
summary_header = [
306312
"#",
307313
"TEST",
308314
"SAMPLES",
@@ -313,25 +319,23 @@ class BenchmarkDriver(object):
313319
"MAX(μs)",
314320
"MAX_RSS(B)",
315321
]
316-
console_log(header)
317-
results = [header]
322+
console_log(summary_header)
323+
results = []
318324
for test in self.tests:
319-
result = result_values(self.run_independent_samples(test))
320-
console_log(result)
325+
result = self.run_independent_samples(test)
326+
console_log(summary(result))
321327
results.append(result)
322328

323329
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
324-
return (
325-
None if csv_console else ("\n".join([",".join(r) for r in results]) + "\n")
326-
) # csv_log
330+
return results
327331

328332
@staticmethod
329333
def run_benchmarks(args):
330334
"""Run benchmarks and log results."""
331335
driver = BenchmarkDriver(args)
332-
csv_log = driver.run_and_log(csv_console=(args.output_dir is None))
333-
if csv_log:
334-
driver.log_results(csv_log)
336+
results = driver.run_and_log(csv_console=(args.output_dir is None))
337+
if args.output_dir:
338+
driver.log_results([r.json for r in results])
335339
return 0
336340

337341

@@ -445,7 +449,6 @@ class BenchmarkDoctor(object):
445449
Optional `driver` parameter for injecting dependency; used for testing.
446450
"""
447451
super(BenchmarkDoctor, self).__init__()
448-
self.driver = driver or BenchmarkDriver(args)
449452
self.results = {}
450453

451454
if hasattr(args, "markdown") and args.markdown:
@@ -458,6 +461,7 @@ class BenchmarkDoctor(object):
458461
self.console_handler.setLevel(
459462
logging.DEBUG if args.verbose else logging.INFO
460463
)
464+
self.driver = driver or BenchmarkDriver(args)
461465
self.log.addHandler(self.console_handler)
462466
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
463467
self.requirements = [
@@ -532,7 +536,7 @@ class BenchmarkDoctor(object):
532536
correction = setup / i
533537
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
534538
for result in i_series:
535-
runtimes.append(result.samples.min - correction)
539+
runtimes.append(result.min_value - correction)
536540
runtime = min(runtimes)
537541

538542
threshold = 1000
@@ -584,7 +588,7 @@ class BenchmarkDoctor(object):
584588
ti1, ti2 = [
585589
float(min(mins))
586590
for mins in [
587-
[result.samples.min for result in i_series]
591+
[result.min_value for result in i_series]
588592
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
589593
]
590594
]
@@ -679,7 +683,7 @@ class BenchmarkDoctor(object):
679683
r = self.driver.run(
680684
benchmark, num_samples=3, num_iters=1, verbose=True
681685
) # calibrate
682-
num_samples = self._adjusted_1s_samples(r.samples.min)
686+
num_samples = self._adjusted_1s_samples(r.min_value)
683687

684688
def capped(s):
685689
return min(s, 200)
@@ -689,7 +693,7 @@ class BenchmarkDoctor(object):
689693
opts = opts if isinstance(opts, list) else [opts]
690694
self.log.debug(
691695
"Runtime {0} μs yields {1} adjusted samples per second.".format(
692-
r.samples.min, num_samples
696+
r.min_value, num_samples
693697
)
694698
)
695699
self.log.debug(

0 commit comments

Comments
 (0)