-
Notifications
You must be signed in to change notification settings - Fork 10.5k
Support Python 3 in the benchmark suite #30085
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND. | |
""" | ||
|
||
import argparse | ||
import functools | ||
import glob | ||
import logging | ||
import math | ||
|
@@ -64,7 +65,9 @@ class BenchmarkDriver(object): | |
os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1" | ||
|
||
def _invoke(self, cmd): | ||
return self._subprocess.check_output(cmd, stderr=self._subprocess.STDOUT) | ||
return self._subprocess.check_output( | ||
cmd, stderr=self._subprocess.STDOUT, universal_newlines=True | ||
) | ||
|
||
@property | ||
def test_harness(self): | ||
|
@@ -165,7 +168,7 @@ class BenchmarkDriver(object): | |
) | ||
output = self._invoke(cmd) | ||
results = self.parser.results_from_string(output) | ||
return results.items()[0][1] if test else results | ||
return list(results.items())[0][1] if test else results | ||
|
||
def _cmd_run( | ||
self, | ||
|
@@ -207,7 +210,7 @@ class BenchmarkDriver(object): | |
a.merge(b) | ||
return a | ||
|
||
return reduce( | ||
return functools.reduce( | ||
merge_results, | ||
[ | ||
self.run(test, measure_memory=True, num_iters=1, quantile=20) | ||
|
@@ -249,19 +252,21 @@ class BenchmarkDriver(object): | |
print(format(values)) | ||
|
||
def result_values(r): | ||
return map( | ||
str, | ||
[ | ||
r.test_num, | ||
r.name, | ||
r.num_samples, | ||
r.min, | ||
r.samples.q1, | ||
r.median, | ||
r.samples.q3, | ||
r.max, | ||
r.max_rss, | ||
], | ||
return list( | ||
map( | ||
str, | ||
[ | ||
r.test_num, | ||
r.name, | ||
r.num_samples, | ||
r.min, | ||
r.samples.q1, | ||
r.median, | ||
r.samples.q3, | ||
r.max, | ||
r.max_rss, | ||
], | ||
) | ||
) | ||
|
||
header = [ | ||
|
@@ -370,7 +375,12 @@ class MarkdownReportHandler(logging.StreamHandler): | |
msg = self.format(record) | ||
stream = self.stream | ||
try: | ||
if isinstance(msg, unicode) and getattr(stream, "encoding", None): | ||
# In Python 2 Unicode strings have a special type | ||
unicode_type = unicode | ||
except NameError: | ||
unicode_type = str | ||
try: | ||
if isinstance(msg, unicode_type) and getattr(stream, "encoding", None): | ||
stream.write(msg.encode(stream.encoding)) | ||
else: | ||
stream.write(msg) | ||
|
@@ -487,16 +497,14 @@ class BenchmarkDoctor(object): | |
name = measurements["name"] | ||
setup, ratio = BenchmarkDoctor._setup_overhead(measurements) | ||
setup = 0 if ratio < 0.05 else setup | ||
runtime = min( | ||
[ | ||
(result.samples.min - correction) | ||
for i_series in [ | ||
BenchmarkDoctor._select(measurements, num_iters=i) | ||
for correction in [(setup / i) for i in [1, 2]] | ||
] | ||
for result in i_series | ||
] | ||
) | ||
|
||
runtimes = [] | ||
for i in range(1, 3): | ||
correction = setup / i | ||
i_series = BenchmarkDoctor._select(measurements, num_iters=i) | ||
for result in i_series: | ||
runtimes.append(result.samples.min - correction) | ||
runtime = min(runtimes) | ||
Comment on lines
-490
to
+507
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like whoever will review this to pay attention at this particular change. The old version relied on Python 2-only feature where the variables defined in a list comprehension leak to the function scope. But I believe this was erroneous: since in Python 2 list comprehensions are eagerly evaluated, the value of [
BenchmarkDoctor._select(measurements, num_iters=i)
for correction in [(setup / i) for i in [1, 2]]
] Here, I've rewritten it to be a stupid old for loop, which IMHO reads way better, and also seems to behave correctly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm… I’ll need to check this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’ve corrected the Python3 incompatibility by explicitly propagating the variables from the nested scopes. I don’t see any issue of incorrect evaluation. Here’s standalone simplified test case base on the original source code before the automatic formatter made mess of it in #29719: $ python
Python 2.7.10 (default, Feb 22 2019, 21:55:15)
[GCC 4.2.1 Compatible Apple LLVM 10.0.1 (clang-1001.0.37.14)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> setup = 134
>>> measurements = {1: [[324], [326], [325]], 2: [[257], [259], [258]]}
>>>
>>>
>>> def select(measurements, num_iters):
... return measurements[num_iters]
...
>>>
>>> runtimesP3 = [
... (result[0] - correction) for correction, i_series in
... [(correction, select(measurements, num_iters=i))
... for i, correction in [(i, setup // i) for i in [1, 2]]
... ] for result in i_series]
>>>
>>> runtimes = [
... (result[0] - correction) for i_series in
... [select(measurements, num_iters=i)
... for correction in [(setup / i) for i in [1, 2]]
... ] for result in i_series]
>>>
>>> runtimes == runtimesP3
True
>>> runtimesP3
[190, 192, 191, 190, 192, 191] It works correctly in Python 3, too: $ python3
Python 3.6.5 (default, May 24 2018, 10:57:11)
[GCC 4.2.1 Compatible Apple LLVM 9.1.0 (clang-902.0.39.1)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> setup = 134
>>> measurements = {1: [[324], [326], [325]], 2: [[257], [259], [258]]}
>>>
>>>
>>> def select(measurements, num_iters):
... return measurements[num_iters]
...
>>>
>>> runtimesP3 = [
... (result[0] - correction) for correction, i_series in
... [(correction, select(measurements, num_iters=i))
... for i, correction in [(i, setup // i) for i in [1, 2]]
... ] for result in i_series]
>>>
>>> runtimesP3
[190, 192, 191, 190, 192, 191] I’ll try to incorporate the Python 3 changes in #26462 (it already had fix for the |
||
|
||
threshold = 1000 | ||
if threshold < runtime: | ||
|
@@ -572,7 +580,9 @@ class BenchmarkDoctor(object): | |
|
||
@staticmethod | ||
def _reasonable_setup_time(measurements): | ||
setup = min([result.setup for result in BenchmarkDoctor._select(measurements)]) | ||
setup = min( | ||
[result.setup or 0 for result in BenchmarkDoctor._select(measurements)] | ||
) | ||
Comment on lines
-575
to
+585
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if 200000 < setup: # 200 ms | ||
BenchmarkDoctor.log_runtime.error( | ||
"'%s' setup took at least %d μs.", measurements["name"], setup | ||
|
@@ -857,6 +867,7 @@ def parse_args(args): | |
help="See COMMAND -h for additional arguments", | ||
metavar="COMMAND", | ||
) | ||
subparsers.required = True | ||
|
||
shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) | ||
benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DTraceResult
's initializer takes exactly 4 arguments. I don't know why it worked before.