@@ -88,11 +88,9 @@ def main():
88
88
help = 'In addition to stdout, write the results into a markdown file' )
89
89
argparser .add_argument (
90
90
'-threshold' , type = float ,
91
- help = 'The performance threshold in %% which triggers a re-run' ,
92
- default = 5 )
93
- argparser .add_argument (
94
- '-num-samples' , type = int ,
95
- help = 'The (minimum) number of samples to run' , default = 3 )
91
+ help = 'The performance threshold in %% which triggers a re-run'
92
+ ' (default: 5)' ,
93
+ default = 5.0 )
96
94
argparser .add_argument (
97
95
'-num-reruns' , type = int ,
98
96
help = "The number of re-runs until it's assumed to be a real change" ,
@@ -123,8 +121,9 @@ def test_opt_levels(args):
123
121
if not args .skip_performance :
124
122
if test_performance (opt_level , args .oldbuilddir [0 ],
125
123
args .newbuilddir [0 ],
126
- float (args .threshold ) / 100 , args .num_samples ,
127
- args .num_reruns , output_file ):
124
+ args .threshold / 100 ,
125
+ args .num_reruns ,
126
+ output_file ):
128
127
changes = True
129
128
130
129
# There is no point in reporting code size for Onone.
@@ -145,7 +144,7 @@ def test_opt_levels(args):
145
144
146
145
if output_file :
147
146
if changes :
148
- output_file .write (get_info_text ())
147
+ output_file .write (get_info_text (args . threshold ))
149
148
else :
150
149
output_file .write ("### No performance and code size changes" )
151
150
output_file .close ()
@@ -154,13 +153,14 @@ def test_opt_levels(args):
154
153
155
154
def measure (driver , tests , i ):
156
155
"""Log and measure samples of the tests with the given driver."""
157
- msg = ' Iteration {0} for {1}:, ' .format (i , driver .args .tests )
156
+ msg = ' Iteration {0} for {1}: ' .format (i , driver .args .tests )
158
157
msg += ('running all tests' if driver .all_tests == tests else
159
158
're-testing {0} tests' .format (len (tests )))
160
159
log (msg )
161
160
driver .tests = tests
162
161
return driver .run (
163
- num_iters = 1 , min_samples = 10 , sample_time = 0.05 , quantile = 20 )
162
+ num_iters = 1 , min_samples = 10 , sample_time = 0.05 , quantile = 20 ,
163
+ gather_metadata = True )
164
164
165
165
166
166
def merge (results , other_results ):
@@ -170,21 +170,20 @@ def merge(results, other_results):
170
170
return results
171
171
172
172
173
- def test_performance (opt_level , old_dir , new_dir , threshold , num_samples ,
173
+ def test_performance (opt_level , old_dir , new_dir , threshold ,
174
174
num_reruns , output_file ):
175
175
"""Detect performance changes in benchmarks.
176
176
177
177
Gather more independent measurements of the change candidates.
178
178
"""
179
-
180
- i , run_count = 0 , 0
179
+ i = 0
181
180
old , new = [BenchmarkDriver (DriverArgs (dir , optimization = opt_level ))
182
181
for dir in [old_dir , new_dir ]]
183
182
results = [measure (driver , driver .tests , i ) for driver in [old , new ]]
184
183
tests = TestComparator (results [0 ], results [1 ], threshold )
185
184
changed = tests .decreased + tests .increased + tests .added
186
185
187
- while len (changed ) > 0 and run_count < num_reruns :
186
+ while len (changed ) > 0 and i < num_reruns :
188
187
i += 1
189
188
if VERBOSE :
190
189
log (' test again: ' + str ([test .name for test in changed ]))
@@ -193,11 +192,10 @@ def test_performance(opt_level, old_dir, new_dir, threshold, num_samples,
193
192
for the_results , driver in zip (results , [old , new ])]
194
193
tests = TestComparator (results [0 ], results [1 ], threshold )
195
194
changed = tests .decreased + tests .increased + tests .added
196
- run_count += 1
197
195
198
196
log ('' )
199
197
return report_results ("Performance: -" + opt_level , None , None ,
200
- threshold * 1.4 , output_file , * results )
198
+ threshold , output_file , * results )
201
199
202
200
203
201
def report_code_size (opt_level , old_dir , new_dir , platform , output_file ):
@@ -259,11 +257,11 @@ def report_results(title, old_lines, new_lines, threshold, output_file,
259
257
return False
260
258
261
259
262
- def get_info_text ():
260
+ def get_info_text (threshold ):
263
261
text = """
264
262
<details>
265
263
<summary><strong>How to read the data</strong></summary>
266
- The tables contain differences in performance which are larger than 8 % and
264
+ The tables contain differences in performance which are larger than {0} % and
267
265
differences in code size which are larger than 1%.
268
266
269
267
If you see any unexpected regressions, you should consider fixing the
@@ -279,7 +277,7 @@ performance team (@eeckstein).
279
277
<details>
280
278
<summary><strong>Hardware Overview</strong></summary>
281
279
282
- """
280
+ """ . format ( threshold )
283
281
po = subprocess .check_output (['system_profiler' , 'SPHardwareDataType' ])
284
282
for line in po .splitlines ():
285
283
selection = ['Model Name' ,
0 commit comments