@@ -16,6 +16,7 @@ from __future__ import print_function
16
16
17
17
import argparse
18
18
import json
19
+ import math
19
20
import os
20
21
import os .path
21
22
import shutil
@@ -162,51 +163,104 @@ def run_many(args):
162
163
return (rng , [run_once (args , ast , [r ]) for r in rng ])
163
164
164
165
166
+ def is_small (x ):
167
+ return abs (x ) < 1e-9
168
+
169
+
165
170
def linear_regression (x , y ):
166
171
# By the book: https://en.wikipedia.org/wiki/Simple_linear_regression
167
- n = len (x )
172
+ n = float ( len (x ) )
168
173
assert n == len (y )
169
174
if n == 0 :
170
175
return 0 , 0
171
176
sum_x = sum (x )
172
177
sum_y = sum (y )
173
178
sum_prod = sum (a * b for a , b in zip (x , y ))
174
179
sum_x_sq = sum (a ** 2 for a in x )
180
+ sum_y_sq = sum (b ** 2 for b in y )
175
181
mean_x = sum_x / n
176
182
mean_y = sum_y / n
177
183
mean_prod = sum_prod / n
178
184
mean_x_sq = sum_x_sq / n
185
+ mean_y_sq = sum_y_sq / n
179
186
covar_xy = mean_prod - mean_x * mean_y
180
187
var_x = mean_x_sq - mean_x ** 2
188
+ var_y = mean_y_sq - mean_y ** 2
181
189
slope = covar_xy / var_x
182
190
inter = mean_y - slope * mean_x
183
- return slope , inter
191
+
192
+ # Compute the correlation coefficient aka r^2, to compare goodness-of-fit.
193
+ if is_small (var_y ):
194
+ # all of the outputs are the same, so this is a perfect fit
195
+ assert is_small (covar_xy )
196
+ cor_coeff_sq = 1.0
197
+ elif is_small (var_x ):
198
+ # all of the inputs are the same, and the outputs are different, so
199
+ # this is a completely imperfect fit
200
+ assert is_small (covar_xy )
201
+ cor_coeff_sq = 0.0
202
+ else :
203
+ cor_coeff_sq = covar_xy ** 2 / (var_x * var_y )
204
+
205
+ return slope , inter , cor_coeff_sq
206
+
207
+
208
+ # Y = a * X^b, returns a, b, R^2
209
+ def fit_polynomial_model (x , y ):
210
+ # transform into linear regression via log(Y) = b*log(X) + log(a)
211
+ log_x = [math .log (val ) for val in x ]
212
+ log_y = [math .log (val ) for val in y ]
213
+
214
+ b , log_a , r2 = linear_regression (log_x , log_y )
215
+ return b , math .exp (log_a ), r2
216
+
217
+
218
+ # Y = a * b^X, returns a, b, R^2
219
+ def fit_exponential_model (x , y ):
220
+ # transform into linear regression via log(Y) = log(b) * X + log(a)
221
+ log_y = [math .log (val ) for val in y ]
222
+
223
+ log_b , log_a , r2 = linear_regression (x , log_y )
224
+ return math .exp (log_b ), math .exp (log_a ), r2
184
225
185
226
186
227
def report (args , rng , runs ):
187
- import math
188
228
bad = False
189
229
keys = set .intersection (* [set (j .keys ()) for j in runs ])
190
230
if len (keys ) == 0 :
191
231
print ("No data found" )
192
232
if len (args .select ) != 0 :
193
233
"(perhaps try a different --select?)"
194
234
return True
195
- x = [math .log (n ) for n in rng ]
196
235
rows = []
197
236
for k in keys :
198
237
vals = [r [k ] for r in runs ]
199
238
bounded = [max (v , 1 ) for v in vals ]
200
- y = [math .log (b ) for b in bounded ]
201
- b , a = linear_regression (x , y )
202
- b = 0 if abs (b ) < 1e-9 else b
203
- rows .append ((b , k , vals ))
239
+ p_b , p_a , p_r2 = fit_polynomial_model (rng , bounded )
240
+ e_b , e_a , e_r2 = fit_exponential_model (rng , bounded )
241
+ if p_r2 >= e_r2 :
242
+ # polynomial is best
243
+ p_b = 0 if is_small (p_b ) else p_b
244
+ rows .append ((False , p_b , k , vals ))
245
+ else :
246
+ # exponential is best
247
+ rows .append ((True , e_b , k , vals ))
248
+ # Exponential fits always go after polynomial fits.
204
249
rows .sort ()
205
- for (b , k , vals ) in rows :
206
- if b >= args .threshold :
250
+ for (is_exp , b , k , vals ) in rows :
251
+ # same threshold for both the polynomial exponent or the exponential
252
+ # base.
253
+ if is_exp :
254
+ this_is_bad = b >= args .exponential_threshold
255
+ formatted = '%1.1f^n' % b
256
+ else :
257
+ this_is_bad = b >= args .polynomial_threshold
258
+ formatted = 'n^%1.1f' % b
259
+
260
+ if this_is_bad :
207
261
bad = True
208
- if not args .quiet or b >= args . threshold :
209
- print ("O(n^%1.1f ) : %s" % (b , k ))
262
+ if not args .quiet or this_is_bad :
263
+ print ("O(%s ) : %s" % (formatted , k ))
210
264
if args .values :
211
265
print (" = " , vals )
212
266
return bad
@@ -228,8 +282,13 @@ def main():
228
282
'--quiet' , action = 'store_true' ,
229
283
default = False , help = 'only print superlinear stats' )
230
284
parser .add_argument (
231
- '--threshold' , type = float ,
232
- default = 1.2 , help = 'exponent beyond which to consider "bad scaling"' )
285
+ '--polynomial-threshold' , type = float ,
286
+ default = 1.2 ,
287
+ help = 'minimum exponent for polynomial fit to consider "bad scaling"' )
288
+ parser .add_argument (
289
+ '--exponential-threshold' , type = float ,
290
+ default = 1.2 ,
291
+ help = 'minimum base for exponential fit to consider "bad scaling"' )
233
292
parser .add_argument (
234
293
'-typecheck' , '--typecheck' , action = 'store_true' ,
235
294
default = False , help = 'only run compiler with -typecheck' )
0 commit comments