21
21
from math import sqrt
22
22
23
23
24
- class PerformanceTestResult :
24
+ class PerformanceTestResult (object ):
25
+ """PerformanceTestResult holds results from executing an individual
26
+ benchmark from the Swift Benchmark Suite as reported by the test driver
27
+ (Benchmark_O, Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
28
+
29
+ It depends on the log format emitted by the test driver in the form:
30
+ #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
31
+
32
+ The last column, MAX_RSS, is emitted only for runs instrumented by the
33
+ Benchmark_Driver to measure rough memory use during the execution of the
34
+ benchmark.
35
+ """
25
36
def __init__ (self , csv_row ):
37
+ """PerformanceTestResult instance is created from an iterable with
38
+ length of 8 or 9. (Like a row provided by the CSV parser.)
39
+ """
26
40
# csv_row[0] is just an ordinal number of the test - skip that
27
41
self .name = csv_row [1 ] # Name of the performance test
28
42
self .samples = int (csv_row [2 ]) # Number of measurement samples taken
@@ -36,25 +50,41 @@ def __init__(self, csv_row):
36
50
self .median = int (csv_row [7 ]) # Median runtime (ms)
37
51
self .max_rss = ( # Maximum Resident Set Size (B)
38
52
int (csv_row [8 ]) if len (csv_row ) > 8 else None )
39
- # TODO if we really want to compute mean MAX_RSS: self.S_memory
53
+
54
+ def __repr__ (self ):
55
+ return (
56
+ '<PerformanceTestResult name:{0.name!r} '
57
+ 'samples:{0.samples!r} min:{0.min!r} max:{0.max!r} '
58
+ 'mean:{0.mean!r} sd:{0.sd!r} median:{0.median!r}>' .format (self ))
40
59
41
60
@property
42
- def sd (self ): # Standard Deviation (ms)
61
+ def sd (self ):
62
+ """Standard Deviation (ms)"""
43
63
return (0 if self .samples < 2 else
44
64
sqrt (self .S_runtime / (self .samples - 1 )))
45
65
46
- # Compute running variance, B. P. Welford's method
47
- # See Knuth TAOCP vol 2, 3rd edition, page 232, or
48
- # https://www.johndcook.com/blog/standard_deviation/
49
- # M is mean, Standard Deviation is defined as sqrt(S/k-1)
50
66
@staticmethod
51
67
def running_mean_variance ((k , M_ , S_ ), x ):
68
+ """
69
+ Compute running variance, B. P. Welford's method
70
+ See Knuth TAOCP vol 2, 3rd edition, page 232, or
71
+ https://www.johndcook.com/blog/standard_deviation/
72
+ M is mean, Standard Deviation is defined as sqrt(S/k-1)
73
+ """
52
74
k = float (k + 1 )
53
75
M = M_ + (x - M_ ) / k
54
76
S = S_ + (x - M_ ) * (x - M )
55
77
return (k , M , S )
56
78
57
79
def merge (self , r ):
80
+ """Merging test results recomputes min and max.
81
+ It attempts to recompute mean and standard deviation when all_samples
82
+ are available. There is no correct way to compute these values from
83
+ test results that are summaries from more than 3 samples.
84
+
85
+ The use case here is comparing tests results parsed from concatenated
86
+ log files from multiple runs of benchmark driver.
87
+ """
58
88
self .min = min (self .min , r .min )
59
89
self .max = max (self .max , r .max )
60
90
# self.median = None # unclear what to do here
@@ -65,23 +95,31 @@ def push(x):
65
95
(self .samples , self .mean , self .S_runtime ) = state
66
96
67
97
# Merging test results with up to 3 samples is exact
68
- # TODO investigate how to best handle merge of higher sample counts
69
- values = [r .min , r .max , r .median , r .mean ][:min (r .samples , 4 )]
98
+ values = [r .min , r .max , r .median ][:min (r .samples , 3 )]
70
99
map (push , values )
71
100
101
+ # Column labels for header row in results table
72
102
header = ('TEST' , 'MIN' , 'MAX' , 'MEAN' , 'MAX_RSS' )
73
103
74
- # Tuple of values formatted for display in results table:
75
- # (name, min value, max value, mean value, max_rss)
76
104
def values (self ):
77
- return (self .name , str (self .min ), str (self .max ), str (int (self .mean )),
78
- str (self .max_rss ) if self .max_rss else '-' )
79
-
80
-
81
- class ResultComparison :
105
+ """Values property for display in results table comparisons
106
+ in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
107
+ """
108
+ return (
109
+ self .name ,
110
+ str (self .min ), str (self .max ), str (int (self .mean )),
111
+ str (self .max_rss ) if self .max_rss else '—'
112
+ )
113
+
114
+
115
+ class ResultComparison (object ):
116
+ """ResultComparison compares MINs from new and old PerformanceTestResult.
117
+ It computes speedup ratio and improvement delta (%).
118
+ """
82
119
def __init__ (self , old , new ):
83
120
self .old = old
84
121
self .new = new
122
+ assert (old .name == new .name )
85
123
self .name = old .name # Test name, convenience accessor
86
124
87
125
# Speedup ratio
@@ -91,27 +129,43 @@ def __init__(self, old, new):
91
129
ratio = (new .min + 0.001 ) / (old .min + 0.001 )
92
130
self .delta = ((ratio - 1 ) * 100 )
93
131
94
- self .is_dubious = ( # FIXME this is legacy
132
+ # Add ' (?)' to the speedup column as indication of dubious changes:
133
+ # result's MIN falls inside the (MIN, MAX) interval of result they are
134
+ # being compared with.
135
+ self .is_dubious = (
95
136
' (?)' if ((old .min < new .min and new .min < old .max ) or
96
137
(new .min < old .min and old .min < new .max ))
97
138
else '' )
98
139
140
+ # Column labels for header row in results table
99
141
header = ('TEST' , 'OLD' , 'NEW' , 'DELTA' , 'SPEEDUP' )
100
142
101
- # Tuple of values formatted for display in results table:
102
- # (name, old value, new value, delta [%], speedup ratio)
103
143
def values (self ):
104
- return (self .name , str (self .old .min ), str (self .new .min ),
144
+ """Values property for display in results table comparisons
145
+ in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
146
+ """
147
+ return (self .name ,
148
+ str (self .old .min ), str (self .new .min ),
105
149
'{0:+.1f}%' .format (self .delta ),
106
150
'{0:.2f}x{1}' .format (self .ratio , self .is_dubious ))
107
151
108
152
109
- class TestComparator :
110
- def __init__ (self , old_file , new_file , delta_threshold , changes_only ):
153
+ class TestComparator (object ):
154
+ """TestComparator parses `PerformanceTestResult`s from CSV log files.
155
+ Then it determines which tests were `added`, `removed` and which can be
156
+ compared. It then splits the `ResultComparison`s into 3 groups according to
157
+ the `delta_threshold` by the change in performance: `increased`,
158
+ `descreased` and `unchanged`.
159
+
160
+ The lists of `added`, `removed` and `unchanged` tests are sorted
161
+ alphabetically. The `increased` and `decreased` lists are sorted in
162
+ descending order by the amount of change.
163
+ """
164
+ def __init__ (self , old_file , new_file , delta_threshold ):
111
165
112
166
def load_from_CSV (filename ): # handles output from Benchmark_O and
113
167
def skip_totals (row ): # Benchmark_Driver (added MAX_RSS column)
114
- return len (row ) > 7 and row [0 ].isdigit ()
168
+ return len (row ) > 7 and row [0 ].isdigit ()
115
169
tests = map (PerformanceTestResult ,
116
170
filter (skip_totals , csv .reader (open (filename ))))
117
171
@@ -131,9 +185,9 @@ def add_or_merge(names, r):
131
185
added_tests = new_tests .difference (old_tests )
132
186
removed_tests = old_tests .difference (new_tests )
133
187
134
- self .added = sorted (map ( lambda t : new_results [t ], added_tests ) ,
188
+ self .added = sorted ([ new_results [t ] for t in added_tests ] ,
135
189
key = lambda r : r .name )
136
- self .removed = sorted (map ( lambda t : old_results [t ], removed_tests ) ,
190
+ self .removed = sorted ([ old_results [t ] for t in removed_tests ] ,
137
191
key = lambda r : r .name )
138
192
139
193
def compare (name ):
@@ -144,24 +198,28 @@ def compare(name):
144
198
def partition (l , p ):
145
199
return reduce (lambda x , y : x [not p (y )].append (y ) or x , l , ([], []))
146
200
147
- # TODO take standard deviation (SD) into account
148
201
decreased , not_decreased = partition (
149
202
comparisons , lambda c : c .ratio < (1 - delta_threshold ))
150
203
increased , unchanged = partition (
151
204
not_decreased , lambda c : c .ratio > (1 + delta_threshold ))
152
205
153
206
# sorted partitions
154
- names = map ( lambda c : c .name , comparisons )
207
+ names = [ c .name for c in comparisons ]
155
208
comparisons = dict (zip (names , comparisons ))
156
- self .decreased = map (lambda c : comparisons [c .name ],
157
- sorted (decreased , key = lambda c : - c .delta ))
158
- self .increased = map (lambda c : comparisons [c .name ],
159
- sorted (increased , key = lambda c : c .delta ))
160
- self .unchanged = map (lambda c : comparisons [c .name ],
161
- sorted (unchanged , key = lambda c : c .name ))
162
-
163
-
164
- class ReportFormatter :
209
+ self .decreased = [comparisons [c .name ]
210
+ for c in sorted (decreased , key = lambda c : - c .delta )]
211
+ self .increased = [comparisons [c .name ]
212
+ for c in sorted (increased , key = lambda c : c .delta )]
213
+ self .unchanged = [comparisons [c .name ]
214
+ for c in sorted (unchanged , key = lambda c : c .name )]
215
+
216
+
217
+ class ReportFormatter (object ):
218
+ """ReportFormatter formats the `PerformanceTestResult`s and
219
+ `ResultComparison`s provided by `TestComparator` using their `header` and
220
+ `values()` into report table. Supported formats are: `markdown` (used for
221
+ displaying benchmark results on GitHub), `git` and `html`.
222
+ """
165
223
def __init__ (self , comparator , old_branch , new_branch , changes_only ):
166
224
self .comparator = comparator
167
225
self .old_branch = old_branch
@@ -178,38 +236,39 @@ def __init__(self, comparator, old_branch, new_branch, changes_only):
178
236
{0} ({1}): {2}"""
179
237
180
238
def markdown (self ):
181
- return self .__formatted_text (
239
+ return self ._formatted_text (
182
240
ROW = '{0} | {1} | {2} | {3} | {4} \n ' ,
183
241
HEADER_SEPARATOR = '---' ,
184
242
DETAIL = self .MARKDOWN_DETAIL )
185
243
186
244
def git (self ):
187
- return self .__formatted_text (
245
+ return self ._formatted_text (
188
246
ROW = '{0} {1} {2} {3} {4} \n ' ,
189
247
HEADER_SEPARATOR = ' ' ,
190
248
DETAIL = self .GIT_DETAIL )
191
249
192
- def __column_widths (self ):
250
+ def _column_widths (self ):
193
251
changed = self .comparator .decreased + self .comparator .increased
194
252
comparisons = (changed if self .changes_only else
195
253
changed + self .comparator .unchanged )
196
254
comparisons += self .comparator .added + self .comparator .removed
197
255
198
- values = map (lambda c : c .values (), comparisons )
199
- widths = map (lambda columns : map (len , columns ),
200
- [PerformanceTestResult .header , ResultComparison .header ] +
201
- values )
256
+ widths = [
257
+ map (len , columns ) for columns in
258
+ [PerformanceTestResult .header , ResultComparison .header ] +
259
+ [c .values () for c in comparisons ]
260
+ ]
202
261
203
262
def max_widths (maximum , widths ):
204
263
return tuple (map (max , zip (maximum , widths )))
205
264
206
265
return reduce (max_widths , widths , tuple ([0 ] * 5 ))
207
266
208
- def __formatted_text (self , ROW , HEADER_SEPARATOR , DETAIL ):
209
- widths = self .__column_widths ()
267
+ def _formatted_text (self , ROW , HEADER_SEPARATOR , DETAIL ):
268
+ widths = self ._column_widths ()
210
269
211
270
def justify_columns (contents ):
212
- return tuple (map ( lambda ( w , c ): c .ljust (w ), zip (widths , contents )) )
271
+ return tuple ([ c .ljust (w ) for w , c in zip (widths , contents )] )
213
272
214
273
def row (contents ):
215
274
return ROW .format (* justify_columns (contents ))
@@ -318,8 +377,8 @@ def table(title, results, speedup_color):
318
377
]))
319
378
320
379
321
- def main ( ):
322
-
380
+ def parse_args ( args ):
381
+ """Parse command line arguments and set default values."""
323
382
parser = argparse .ArgumentParser (description = 'Compare Performance tests.' )
324
383
parser .add_argument ('--old-file' ,
325
384
help = 'Baseline performance test suite (csv file)' ,
@@ -339,42 +398,29 @@ def main():
339
398
parser .add_argument ('--old-branch' ,
340
399
help = 'Name of the old branch' , default = 'OLD_MIN' )
341
400
parser .add_argument ('--delta-threshold' ,
342
- help = 'Delta threshold. Default 0.05.' , default = '0.05' )
401
+ help = 'Delta threshold. Default 0.05.' ,
402
+ type = float , default = 0.05 )
403
+ return parser .parse_args (args )
404
+
343
405
344
- args = parser .parse_args ()
406
+ def main ():
407
+ args = parse_args (sys .argv [1 :])
345
408
comparator = TestComparator (args .old_file , args .new_file ,
346
- float ( args .delta_threshold ), args . changes_only )
409
+ args .delta_threshold )
347
410
formatter = ReportFormatter (comparator , args .old_branch , args .new_branch ,
348
411
args .changes_only )
349
-
350
- if args .format :
351
- if args .format .lower () != 'markdown' :
352
- print (formatter .git ())
353
- else :
354
- print (formatter .markdown ())
355
-
356
- if args .format :
357
- if args .format .lower () == 'html' :
358
- if args .output :
359
- write_to_file (args .output , formatter .html ())
360
- else :
361
- print ('Error: missing --output flag.' )
362
- sys .exit (1 )
363
- elif args .format .lower () == 'markdown' :
364
- if args .output :
365
- write_to_file (args .output , formatter .markdown ())
366
- elif args .format .lower () != 'git' :
367
- print ('{0} is unknown format.' .format (args .format ))
368
- sys .exit (1 )
369
-
370
-
371
- def write_to_file (file_name , data ):
372
- """
373
- Write data to given file
374
- """
375
- file = open (file_name , 'w' )
376
- file .write (data )
377
- file .close
412
+ formats = {
413
+ 'markdown' : formatter .markdown ,
414
+ 'git' : formatter .git ,
415
+ 'html' : formatter .html
416
+ }
417
+
418
+ report = formats [args .format ]()
419
+ print (report )
420
+
421
+ if args .output :
422
+ with open (args .output , 'w' ) as f :
423
+ f .write (report )
378
424
379
425
380
426
if __name__ == '__main__' :
0 commit comments