Skip to content

Commit 33b673e

Browse files
authored
Merge pull request #11892 from graydon/scale-test-improvements
2 parents 7dc2c70 + 2c281c7 commit 33b673e

File tree

2 files changed

+183
-28
lines changed

2 files changed

+183
-28
lines changed

utils/jobstats/jobstats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def load_stats_dir(path, select_module=[]):
163163
"""Loads all stats-files found in path into a list of JobStats objects"""
164164
jobstats = []
165165
auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
166-
r"-(?P<out>[^-]+)-(?P<opt>[^-]+)")
166+
r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
167167
fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
168168
auxpat +
169169
r"-(?P<pid>\d+)(-.*)?.json$")

utils/scale-test

Lines changed: 182 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import tempfile
2828
from collections import namedtuple
2929
from operator import attrgetter
3030
import gyb
31+
from jobstats import load_stats_dir, merge_all_jobstats
3132

3233

3334
def find_which(p):
@@ -63,12 +64,42 @@ def write_input_file(args, ast, d, n):
6364
return fname
6465

6566

67+
def ensure_tmpdir(d):
68+
if d is not None and not os.path.exists(d):
69+
os.makedirs(d, 0700)
70+
return tempfile.mkdtemp(dir=d)
71+
72+
73+
# In newer compilers, we can use -stats-output-dir and get both more
74+
# counters, plus counters that are enabled in non-assert builds. Check
75+
# to see if we have support for that.
76+
def supports_stats_output_dir(args):
77+
d = ensure_tmpdir(args.tmpdir)
78+
sd = os.path.join(d, "stats-probe")
79+
80+
try:
81+
os.makedirs(sd, 0700)
82+
# Write a trivial test program and try running with
83+
# -stats-output-dir
84+
testpath = os.path.join(sd, "test.swift")
85+
with open(testpath, 'w+') as f:
86+
f.write("print(1)\n")
87+
command = [args.swiftc_binary, '-frontend',
88+
'-typecheck',
89+
'-stats-output-dir', sd, testpath]
90+
subprocess.check_call(command)
91+
stats = load_stats_dir(sd)
92+
return len(stats) != 0
93+
except subprocess.CalledProcessError:
94+
return False
95+
finally:
96+
shutil.rmtree(sd)
97+
98+
6699
def run_once_with_primary(args, ast, rng, primary_idx):
67100
r = {}
68101
try:
69-
if args.tmpdir is not None and not os.path.exists(args.tmpdir):
70-
os.makedirs(args.tmpdir, 0700)
71-
d = tempfile.mkdtemp(dir=args.tmpdir)
102+
d = ensure_tmpdir(args.tmpdir)
72103
inputs = [write_input_file(args, ast, d, i) for i in rng]
73104
primary = inputs[primary_idx]
74105
ofile = "out.o"
@@ -122,17 +153,23 @@ def run_once_with_primary(args, ast, rng, primary_idx):
122153
if args.debug:
123154
command = ["lldb", "--"] + command
124155
stats = "stats.json"
125-
argv = command + ["-Xllvm", "-stats",
126-
"-Xllvm", "-stats-json",
127-
"-Xllvm", "-info-output-file=" + stats]
156+
if args.llvm_stat_reporter:
157+
argv = command + ["-Xllvm", "-stats",
158+
"-Xllvm", "-stats-json",
159+
"-Xllvm", "-info-output-file=" + stats]
160+
else:
161+
argv = command + ["-stats-output-dir", d]
128162
try:
129163
subprocess.check_call(argv, cwd=d)
130164
except subprocess.CalledProcessError as e:
131165
if e.returncode != args.expected_exit_code:
132166
raise
133167

134-
with open(os.path.join(d, stats)) as f:
135-
r = json.load(f)
168+
if args.llvm_stat_reporter:
169+
with open(os.path.join(d, stats)) as f:
170+
r = json.load(f)
171+
else:
172+
r = merge_all_jobstats(load_stats_dir(d)).stats
136173
finally:
137174
shutil.rmtree(d)
138175

@@ -168,6 +205,17 @@ def run_many(args):
168205
print("")
169206
exit(1)
170207

208+
if not args.llvm_stat_reporter:
209+
if not supports_stats_output_dir(args):
210+
print("**************************************************")
211+
print("")
212+
print("unable to use new-style -stats-output-dir reporting,")
213+
print("falling back to old-style -Xllvm -stats-json reporting")
214+
print("(run with --llvm-stat-reporter to silence this warning)")
215+
print("")
216+
print("**************************************************")
217+
args.llvm_stat_reporter = True
218+
171219
ast = gyb.parse_template(args.file.name, args.file.read())
172220
rng = range(args.begin, args.end, args.step)
173221
if args.step > (args.end - args.begin):
@@ -334,22 +382,62 @@ def fit_function_to_data_by_least_squares(objective, params, bounds, xs, ys):
334382
raise ValueError("Nelder-Mead failed %d retries" % retries)
335383

336384

385+
# Fit a 2-parameter linear model f(x) = const + coeff * x to a set
386+
# of data (lists of xs and ys). Returns (coeff, const, fit).
387+
def fit_linear_model(xs, ys):
388+
# By the book: https://en.wikipedia.org/wiki/Simple_linear_regression
389+
n = float(len(xs))
390+
assert n == len(ys)
391+
if n == 0:
392+
return 0, 0, 1.0
393+
394+
# Don't bother with anything fancy if function is constant.
395+
if all(y == ys[0] for y in ys):
396+
return (0.0, ys[0], 1.0)
397+
398+
sum_x = sum(xs)
399+
sum_y = sum(ys)
400+
sum_prod = sum(a * b for a, b in zip(xs, ys))
401+
sum_x_sq = sum(a ** 2 for a in xs)
402+
sum_y_sq = sum(b ** 2 for b in ys)
403+
mean_x = sum_x / n
404+
mean_y = sum_y / n
405+
mean_prod = sum_prod / n
406+
mean_x_sq = sum_x_sq / n
407+
mean_y_sq = sum_y_sq / n
408+
covar_xy = mean_prod - mean_x * mean_y
409+
var_x = mean_x_sq - mean_x**2
410+
var_y = mean_y_sq - mean_y**2
411+
slope = covar_xy / var_x
412+
inter = mean_y - slope * mean_x
413+
414+
# Compute the correlation coefficient aka r^2, to compare goodness-of-fit.
415+
if is_somewhat_small(var_y):
416+
# all of the outputs are the same, so this is a perfect fit
417+
assert is_somewhat_small(covar_xy)
418+
cor_coeff_sq = 1.0
419+
elif is_somewhat_small(var_x):
420+
# all of the inputs are the same, and the outputs are different, so
421+
# this is a completely imperfect fit
422+
assert is_somewhat_small(covar_xy)
423+
cor_coeff_sq = 0.0
424+
else:
425+
cor_coeff_sq = covar_xy**2 / (var_x * var_y)
426+
427+
return slope, inter, cor_coeff_sq
428+
429+
337430
# Fit a 3-parameter polynomial model f(x) = const + coeff * x^exp to a set
338431
# of data (lists of xs and ys). Returns (exp, coeff, fit).
339432
def fit_polynomial_model(xs, ys):
340433

341-
# Don't bother running a simplex around a flat landscape if the input is
342-
# constant.
343-
if all(y == ys[0] for y in ys):
344-
return (0.0, 0.0, 1.0)
345-
346434
PolynomialParams = namedtuple('PolynomialParams',
347435
['const', 'coeff', 'exp'])
348436
params = PolynomialParams(const=0.0, coeff=1.0, exp=1.0)
349437
mag = max(abs(y) for y in ys)
350438
bounds = PolynomialParams(const=(0, mag),
351439
coeff=(0, mag),
352-
exp=(0.25, 3.0))
440+
exp=(0.25, 8.0))
353441

354442
def objective(params, x):
355443
return params.const + params.coeff * (x ** params.exp)
@@ -391,19 +479,29 @@ def self_test():
391479

392480
class Tests(unittest.TestCase):
393481

482+
def check_linearfit(self, xs, ys, lin, fit=1.0):
483+
(m, _, f) = fit_linear_model(xs, ys)
484+
print("linearfit(xs, ys, lin=%f, fit=%f) = (%f, %f)" %
485+
(lin, fit, m, f))
486+
self.assertAlmostEqual(m, lin, places=1)
487+
self.assertAlmostEqual(f, fit, places=1)
488+
return f
489+
394490
def check_polyfit(self, xs, ys, exp, fit=1.0):
395491
(e, _, f) = fit_polynomial_model(xs, ys)
396492
print("polyfit(xs, ys, exp=%f, fit=%f) = (%f, %f)" %
397493
(exp, fit, e, f))
398-
self.assertAlmostEqual(e, exp, places=0)
399-
self.assertAlmostEqual(f, fit, places=0)
494+
self.assertAlmostEqual(e, exp, places=1)
495+
self.assertAlmostEqual(f, fit, places=1)
496+
return f
400497

401498
def check_expfit(self, xs, ys, base, fit=1.0):
402499
(b, _, f) = fit_exponential_model(xs, ys)
403500
print("expfit(xs, ys, base=%f, fit=%f) = (%f, %f)" %
404501
(base, fit, b, f))
405-
self.assertAlmostEqual(b, base, places=0)
406-
self.assertAlmostEqual(f, fit, places=0)
502+
self.assertAlmostEqual(b, base, places=1)
503+
self.assertAlmostEqual(f, fit, places=1)
504+
return f
407505

408506
def test_tuples(self):
409507
self.assertEqual(tup_distance((1, 0, 0), (0, 0, 0)), 1.0)
@@ -436,6 +534,47 @@ def self_test():
436534
self.check_polyfit([5, 10, 15],
437535
[307, 632, 957], 1)
438536

537+
# "Basically linear", with a little nonlinearity in the first
538+
# point. Polynomial-fit fails here because the simplex algorithm
539+
# keeps trying to account for the first point by admitting a
540+
# nonzero nonlinear term, thus bending the whole line instead of
541+
# focusing on the linear and constant terms. So we run an
542+
# independent fit on a "strictly linear" model too.
543+
def test_eventually_linear(self):
544+
self.check_linearfit([1, 2, 3, 4, 5, 6, 7, 8],
545+
[15, 20, 30, 40, 50, 60, 70, 80],
546+
9.6)
547+
548+
# Double check that linear-fit (which "always fits") isn't
549+
# preferred over good nonlinear fits.
550+
def test_linear_model_of_poly(self):
551+
xs = [10, 20, 30, 40, 50, 60]
552+
ys = [100, 400, 900, 1600, 2500, 3600]
553+
lf = self.check_linearfit(xs, ys, 70)
554+
pf = self.check_polyfit(xs, ys, 2)
555+
self.assertGreater(pf, lf)
556+
557+
def test_linear_model_of_poly_2(self):
558+
xs = [10, 20, 30, 40, 50, 60]
559+
ys = [1000, 8000, 27000, 64000, 125000, 216000]
560+
lf = self.check_linearfit(xs, ys, 4180, 0.87)
561+
pf = self.check_polyfit(xs, ys, 3)
562+
self.assertGreater(pf, lf)
563+
564+
def test_linear_model_of_poly_3(self):
565+
xs = [1, 2, 3, 4, 5]
566+
ys = [1.0, 2.3, 3.74, 5.28, 6.9]
567+
lf = self.check_linearfit(xs, ys, 1.47)
568+
pf = self.check_polyfit(xs, ys, 1.2)
569+
self.assertGreater(pf, lf)
570+
571+
def test_linear_model_of_poly_offset(self):
572+
xs = [10, 20, 30, 40, 50, 60]
573+
ys = [1100, 1400, 1900, 2600, 3500, 4600]
574+
lf = self.check_linearfit(xs, ys, 70)
575+
pf = self.check_polyfit(xs, ys, 2)
576+
self.assertGreater(pf, lf)
577+
439578
def test_linear_offset(self):
440579
self.check_polyfit([1, 2, 3, 4, 5, 6],
441580
[1000 + i for i in range(1, 7)], 1)
@@ -491,25 +630,38 @@ def report(args, rng, runs):
491630
vals = [r[k] for r in runs]
492631
bounded = [max(v, 1) for v in vals]
493632
one_fit = False
633+
perfect_fit = False
634+
fit_r2_thresh = 0.99
635+
lin_b, lin_a, lin_r2 = fit_linear_model(rng, bounded)
636+
if lin_r2 > fit_r2_thresh:
637+
one_fit = True
638+
if lin_r2 == 1.0:
639+
perfect_fit = True
494640
p_b, p_a, p_r2 = (1.0, 1.0, 0.0)
495641
e_b, e_a, e_r2 = (1.0, 1.0, 0.0)
496642
try:
497-
p_b, p_a, p_r2 = fit_polynomial_model(rng, bounded)
498-
if p_r2 > 0.9:
499-
one_fit = True
643+
if not perfect_fit:
644+
p_b, p_a, p_r2 = fit_polynomial_model(rng, bounded)
645+
if p_r2 > fit_r2_thresh:
646+
one_fit = True
647+
if p_r2 == 1.0:
648+
perfect_fit = True
500649
except ValueError:
501650
pass
502651
try:
503-
e_b, e_a, e_r2 = fit_exponential_model(rng, bounded)
504-
if e_r2 > 0.9:
505-
one_fit = True
652+
if not perfect_fit:
653+
e_b, e_a, e_r2 = fit_exponential_model(rng, bounded)
654+
if e_r2 > fit_r2_thresh:
655+
one_fit = True
506656
except ValueError:
507657
pass
508658
if not one_fit:
509-
print("failed to fit either polynomial or exponential model to " +
510-
repr(vals))
659+
print("failed to fit model to " + repr(vals))
511660
return True
512-
if p_r2 >= e_r2:
661+
if lin_r2 >= e_r2 and lin_r2 >= p_r2:
662+
# strict-linear is best
663+
rows.append((False, 0.0 if lin_b == 0 else 1.0, k, vals))
664+
elif p_r2 >= e_r2:
513665
# polynomial is best
514666
rows.append((False, p_b, k, vals))
515667
else:
@@ -598,6 +750,9 @@ def main():
598750
parser.add_argument(
599751
'--debug', action='store_true',
600752
default=False, help='invoke lldb on each scale test')
753+
parser.add_argument(
754+
'--llvm-stat-reporter', action='store_true',
755+
default=False, help='only collect stats via old-style LLVM reporter')
601756
parser.add_argument(
602757
'--self-test', action='store_true',
603758
default=False, help='run arithmetic unit-tests of scale-test itself')

0 commit comments

Comments
 (0)