@@ -28,6 +28,7 @@ import tempfile
28
28
from collections import namedtuple
29
29
from operator import attrgetter
30
30
import gyb
31
+ from jobstats import load_stats_dir , merge_all_jobstats
31
32
32
33
33
34
def find_which (p ):
@@ -63,12 +64,42 @@ def write_input_file(args, ast, d, n):
63
64
return fname
64
65
65
66
67
+ def ensure_tmpdir (d ):
68
+ if d is not None and not os .path .exists (d ):
69
+ os .makedirs (d , 0700 )
70
+ return tempfile .mkdtemp (dir = d )
71
+
72
+
73
+ # In newer compilers, we can use -stats-output-dir and get both more
74
+ # counters, plus counters that are enabled in non-assert builds. Check
75
+ # to see if we have support for that.
76
+ def supports_stats_output_dir (args ):
77
+ d = ensure_tmpdir (args .tmpdir )
78
+ sd = os .path .join (d , "stats-probe" )
79
+
80
+ try :
81
+ os .makedirs (sd , 0700 )
82
+ # Write a trivial test program and try running with
83
+ # -stats-output-dir
84
+ testpath = os .path .join (sd , "test.swift" )
85
+ with open (testpath , 'w+' ) as f :
86
+ f .write ("print(1)\n " )
87
+ command = [args .swiftc_binary , '-frontend' ,
88
+ '-typecheck' ,
89
+ '-stats-output-dir' , sd , testpath ]
90
+ subprocess .check_call (command )
91
+ stats = load_stats_dir (sd )
92
+ return len (stats ) != 0
93
+ except subprocess .CalledProcessError :
94
+ return False
95
+ finally :
96
+ shutil .rmtree (sd )
97
+
98
+
66
99
def run_once_with_primary (args , ast , rng , primary_idx ):
67
100
r = {}
68
101
try :
69
- if args .tmpdir is not None and not os .path .exists (args .tmpdir ):
70
- os .makedirs (args .tmpdir , 0700 )
71
- d = tempfile .mkdtemp (dir = args .tmpdir )
102
+ d = ensure_tmpdir (args .tmpdir )
72
103
inputs = [write_input_file (args , ast , d , i ) for i in rng ]
73
104
primary = inputs [primary_idx ]
74
105
ofile = "out.o"
@@ -122,17 +153,23 @@ def run_once_with_primary(args, ast, rng, primary_idx):
122
153
if args .debug :
123
154
command = ["lldb" , "--" ] + command
124
155
stats = "stats.json"
125
- argv = command + ["-Xllvm" , "-stats" ,
126
- "-Xllvm" , "-stats-json" ,
127
- "-Xllvm" , "-info-output-file=" + stats ]
156
+ if args .llvm_stat_reporter :
157
+ argv = command + ["-Xllvm" , "-stats" ,
158
+ "-Xllvm" , "-stats-json" ,
159
+ "-Xllvm" , "-info-output-file=" + stats ]
160
+ else :
161
+ argv = command + ["-stats-output-dir" , d ]
128
162
try :
129
163
subprocess .check_call (argv , cwd = d )
130
164
except subprocess .CalledProcessError as e :
131
165
if e .returncode != args .expected_exit_code :
132
166
raise
133
167
134
- with open (os .path .join (d , stats )) as f :
135
- r = json .load (f )
168
+ if args .llvm_stat_reporter :
169
+ with open (os .path .join (d , stats )) as f :
170
+ r = json .load (f )
171
+ else :
172
+ r = merge_all_jobstats (load_stats_dir (d )).stats
136
173
finally :
137
174
shutil .rmtree (d )
138
175
@@ -168,6 +205,17 @@ def run_many(args):
168
205
print ("" )
169
206
exit (1 )
170
207
208
+ if not args .llvm_stat_reporter :
209
+ if not supports_stats_output_dir (args ):
210
+ print ("**************************************************" )
211
+ print ("" )
212
+ print ("unable to use new-style -stats-output-dir reporting," )
213
+ print ("falling back to old-style -Xllvm -stats-json reporting" )
214
+ print ("(run with --llvm-stat-reporter to silence this warning)" )
215
+ print ("" )
216
+ print ("**************************************************" )
217
+ args .llvm_stat_reporter = True
218
+
171
219
ast = gyb .parse_template (args .file .name , args .file .read ())
172
220
rng = range (args .begin , args .end , args .step )
173
221
if args .step > (args .end - args .begin ):
@@ -334,22 +382,62 @@ def fit_function_to_data_by_least_squares(objective, params, bounds, xs, ys):
334
382
raise ValueError ("Nelder-Mead failed %d retries" % retries )
335
383
336
384
385
+ # Fit a 2-parameter linear model f(x) = const + coeff * x to a set
386
+ # of data (lists of xs and ys). Returns (coeff, const, fit).
387
+ def fit_linear_model (xs , ys ):
388
+ # By the book: https://en.wikipedia.org/wiki/Simple_linear_regression
389
+ n = float (len (xs ))
390
+ assert n == len (ys )
391
+ if n == 0 :
392
+ return 0 , 0 , 1.0
393
+
394
+ # Don't bother with anything fancy if function is constant.
395
+ if all (y == ys [0 ] for y in ys ):
396
+ return (0.0 , ys [0 ], 1.0 )
397
+
398
+ sum_x = sum (xs )
399
+ sum_y = sum (ys )
400
+ sum_prod = sum (a * b for a , b in zip (xs , ys ))
401
+ sum_x_sq = sum (a ** 2 for a in xs )
402
+ sum_y_sq = sum (b ** 2 for b in ys )
403
+ mean_x = sum_x / n
404
+ mean_y = sum_y / n
405
+ mean_prod = sum_prod / n
406
+ mean_x_sq = sum_x_sq / n
407
+ mean_y_sq = sum_y_sq / n
408
+ covar_xy = mean_prod - mean_x * mean_y
409
+ var_x = mean_x_sq - mean_x ** 2
410
+ var_y = mean_y_sq - mean_y ** 2
411
+ slope = covar_xy / var_x
412
+ inter = mean_y - slope * mean_x
413
+
414
+ # Compute the correlation coefficient aka r^2, to compare goodness-of-fit.
415
+ if is_somewhat_small (var_y ):
416
+ # all of the outputs are the same, so this is a perfect fit
417
+ assert is_somewhat_small (covar_xy )
418
+ cor_coeff_sq = 1.0
419
+ elif is_somewhat_small (var_x ):
420
+ # all of the inputs are the same, and the outputs are different, so
421
+ # this is a completely imperfect fit
422
+ assert is_somewhat_small (covar_xy )
423
+ cor_coeff_sq = 0.0
424
+ else :
425
+ cor_coeff_sq = covar_xy ** 2 / (var_x * var_y )
426
+
427
+ return slope , inter , cor_coeff_sq
428
+
429
+
337
430
# Fit a 3-parameter polynomial model f(x) = const + coeff * x^exp to a set
338
431
# of data (lists of xs and ys). Returns (exp, coeff, fit).
339
432
def fit_polynomial_model (xs , ys ):
340
433
341
- # Don't bother running a simplex around a flat landscape if the input is
342
- # constant.
343
- if all (y == ys [0 ] for y in ys ):
344
- return (0.0 , 0.0 , 1.0 )
345
-
346
434
PolynomialParams = namedtuple ('PolynomialParams' ,
347
435
['const' , 'coeff' , 'exp' ])
348
436
params = PolynomialParams (const = 0.0 , coeff = 1.0 , exp = 1.0 )
349
437
mag = max (abs (y ) for y in ys )
350
438
bounds = PolynomialParams (const = (0 , mag ),
351
439
coeff = (0 , mag ),
352
- exp = (0.25 , 3 .0 ))
440
+ exp = (0.25 , 8 .0 ))
353
441
354
442
def objective (params , x ):
355
443
return params .const + params .coeff * (x ** params .exp )
@@ -391,19 +479,29 @@ def self_test():
391
479
392
480
class Tests (unittest .TestCase ):
393
481
482
+ def check_linearfit (self , xs , ys , lin , fit = 1.0 ):
483
+ (m , _ , f ) = fit_linear_model (xs , ys )
484
+ print ("linearfit(xs, ys, lin=%f, fit=%f) = (%f, %f)" %
485
+ (lin , fit , m , f ))
486
+ self .assertAlmostEqual (m , lin , places = 1 )
487
+ self .assertAlmostEqual (f , fit , places = 1 )
488
+ return f
489
+
394
490
def check_polyfit (self , xs , ys , exp , fit = 1.0 ):
395
491
(e , _ , f ) = fit_polynomial_model (xs , ys )
396
492
print ("polyfit(xs, ys, exp=%f, fit=%f) = (%f, %f)" %
397
493
(exp , fit , e , f ))
398
- self .assertAlmostEqual (e , exp , places = 0 )
399
- self .assertAlmostEqual (f , fit , places = 0 )
494
+ self .assertAlmostEqual (e , exp , places = 1 )
495
+ self .assertAlmostEqual (f , fit , places = 1 )
496
+ return f
400
497
401
498
def check_expfit (self , xs , ys , base , fit = 1.0 ):
402
499
(b , _ , f ) = fit_exponential_model (xs , ys )
403
500
print ("expfit(xs, ys, base=%f, fit=%f) = (%f, %f)" %
404
501
(base , fit , b , f ))
405
- self .assertAlmostEqual (b , base , places = 0 )
406
- self .assertAlmostEqual (f , fit , places = 0 )
502
+ self .assertAlmostEqual (b , base , places = 1 )
503
+ self .assertAlmostEqual (f , fit , places = 1 )
504
+ return f
407
505
408
506
def test_tuples (self ):
409
507
self .assertEqual (tup_distance ((1 , 0 , 0 ), (0 , 0 , 0 )), 1.0 )
@@ -436,6 +534,47 @@ def self_test():
436
534
self .check_polyfit ([5 , 10 , 15 ],
437
535
[307 , 632 , 957 ], 1 )
438
536
537
+ # "Basically linear", with a little nonlinearity in the first
538
+ # point. Polynomial-fit fails here because the simplex algorithm
539
+ # keeps trying to account for the first point by admitting a
540
+ # nonzero nonlinear term, thus bending the whole line instead of
541
+ # focusing on the linear and constant terms. So we run an
542
+ # independent fit on a "strictly linear" model too.
543
+ def test_eventually_linear (self ):
544
+ self .check_linearfit ([1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
545
+ [15 , 20 , 30 , 40 , 50 , 60 , 70 , 80 ],
546
+ 9.6 )
547
+
548
+ # Double check that linear-fit (which "always fits") isn't
549
+ # preferred over good nonlinear fits.
550
+ def test_linear_model_of_poly (self ):
551
+ xs = [10 , 20 , 30 , 40 , 50 , 60 ]
552
+ ys = [100 , 400 , 900 , 1600 , 2500 , 3600 ]
553
+ lf = self .check_linearfit (xs , ys , 70 )
554
+ pf = self .check_polyfit (xs , ys , 2 )
555
+ self .assertGreater (pf , lf )
556
+
557
+ def test_linear_model_of_poly_2 (self ):
558
+ xs = [10 , 20 , 30 , 40 , 50 , 60 ]
559
+ ys = [1000 , 8000 , 27000 , 64000 , 125000 , 216000 ]
560
+ lf = self .check_linearfit (xs , ys , 4180 , 0.87 )
561
+ pf = self .check_polyfit (xs , ys , 3 )
562
+ self .assertGreater (pf , lf )
563
+
564
+ def test_linear_model_of_poly_3 (self ):
565
+ xs = [1 , 2 , 3 , 4 , 5 ]
566
+ ys = [1.0 , 2.3 , 3.74 , 5.28 , 6.9 ]
567
+ lf = self .check_linearfit (xs , ys , 1.47 )
568
+ pf = self .check_polyfit (xs , ys , 1.2 )
569
+ self .assertGreater (pf , lf )
570
+
571
+ def test_linear_model_of_poly_offset (self ):
572
+ xs = [10 , 20 , 30 , 40 , 50 , 60 ]
573
+ ys = [1100 , 1400 , 1900 , 2600 , 3500 , 4600 ]
574
+ lf = self .check_linearfit (xs , ys , 70 )
575
+ pf = self .check_polyfit (xs , ys , 2 )
576
+ self .assertGreater (pf , lf )
577
+
439
578
def test_linear_offset (self ):
440
579
self .check_polyfit ([1 , 2 , 3 , 4 , 5 , 6 ],
441
580
[1000 + i for i in range (1 , 7 )], 1 )
@@ -491,25 +630,38 @@ def report(args, rng, runs):
491
630
vals = [r [k ] for r in runs ]
492
631
bounded = [max (v , 1 ) for v in vals ]
493
632
one_fit = False
633
+ perfect_fit = False
634
+ fit_r2_thresh = 0.99
635
+ lin_b , lin_a , lin_r2 = fit_linear_model (rng , bounded )
636
+ if lin_r2 > fit_r2_thresh :
637
+ one_fit = True
638
+ if lin_r2 == 1.0 :
639
+ perfect_fit = True
494
640
p_b , p_a , p_r2 = (1.0 , 1.0 , 0.0 )
495
641
e_b , e_a , e_r2 = (1.0 , 1.0 , 0.0 )
496
642
try :
497
- p_b , p_a , p_r2 = fit_polynomial_model (rng , bounded )
498
- if p_r2 > 0.9 :
499
- one_fit = True
643
+ if not perfect_fit :
644
+ p_b , p_a , p_r2 = fit_polynomial_model (rng , bounded )
645
+ if p_r2 > fit_r2_thresh :
646
+ one_fit = True
647
+ if p_r2 == 1.0 :
648
+ perfect_fit = True
500
649
except ValueError :
501
650
pass
502
651
try :
503
- e_b , e_a , e_r2 = fit_exponential_model (rng , bounded )
504
- if e_r2 > 0.9 :
505
- one_fit = True
652
+ if not perfect_fit :
653
+ e_b , e_a , e_r2 = fit_exponential_model (rng , bounded )
654
+ if e_r2 > fit_r2_thresh :
655
+ one_fit = True
506
656
except ValueError :
507
657
pass
508
658
if not one_fit :
509
- print ("failed to fit either polynomial or exponential model to " +
510
- repr (vals ))
659
+ print ("failed to fit model to " + repr (vals ))
511
660
return True
512
- if p_r2 >= e_r2 :
661
+ if lin_r2 >= e_r2 and lin_r2 >= p_r2 :
662
+ # strict-linear is best
663
+ rows .append ((False , 0.0 if lin_b == 0 else 1.0 , k , vals ))
664
+ elif p_r2 >= e_r2 :
513
665
# polynomial is best
514
666
rows .append ((False , p_b , k , vals ))
515
667
else :
@@ -598,6 +750,9 @@ def main():
598
750
parser .add_argument (
599
751
'--debug' , action = 'store_true' ,
600
752
default = False , help = 'invoke lldb on each scale test' )
753
+ parser .add_argument (
754
+ '--llvm-stat-reporter' , action = 'store_true' ,
755
+ default = False , help = 'only collect stats via old-style LLVM reporter' )
601
756
parser .add_argument (
602
757
'--self-test' , action = 'store_true' ,
603
758
default = False , help = 'run arithmetic unit-tests of scale-test itself' )
0 commit comments