Merge pull request #11892 from graydon/scale-test-improvements

swift-ci · web-flow · commit 33b673e32ae0 · 2017-09-13T00:19:08.000-07:00
diff --git a/utils/jobstats/jobstats.py b/utils/jobstats/jobstats.py
@@ -163,7 +163,7 @@ def load_stats_dir(path, select_module=[]):
     """Loads all stats-files found in path into a list of JobStats objects"""
     jobstats = []
     auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
-              r"-(?P<out>[^-]+)-(?P<opt>[^-]+)")
+              r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
     fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
             auxpat +
             r"-(?P<pid>\d+)(-.*)?.json$")
diff --git a/utils/scale-test b/utils/scale-test
@@ -28,6 +28,7 @@ import tempfile
 from collections import namedtuple
 from operator import attrgetter
 import gyb
+from jobstats import load_stats_dir, merge_all_jobstats
 
 
 def find_which(p):
@@ -63,12 +64,42 @@ def write_input_file(args, ast, d, n):
     return fname
 
 
+def ensure_tmpdir(d):
+    if d is not None and not os.path.exists(d):
+        os.makedirs(d, 0700)
+    return tempfile.mkdtemp(dir=d)
+
+
+# In newer compilers, we can use -stats-output-dir and get both more
+# counters, plus counters that are enabled in non-assert builds. Check
+# to see if we have support for that.
+def supports_stats_output_dir(args):
+    d = ensure_tmpdir(args.tmpdir)
+    sd = os.path.join(d, "stats-probe")
+
+    try:
+        os.makedirs(sd, 0700)
+        # Write a trivial test program and try running with
+        # -stats-output-dir
+        testpath = os.path.join(sd, "test.swift")
+        with open(testpath, 'w+') as f:
+            f.write("print(1)\n")
+        command = [args.swiftc_binary, '-frontend',
+                   '-typecheck',
+                   '-stats-output-dir', sd, testpath]
+        subprocess.check_call(command)
+        stats = load_stats_dir(sd)
+        return len(stats) != 0
+    except subprocess.CalledProcessError:
+        return False
+    finally:
+        shutil.rmtree(sd)
+
+
 def run_once_with_primary(args, ast, rng, primary_idx):
     r = {}
     try:
-        if args.tmpdir is not None and not os.path.exists(args.tmpdir):
-            os.makedirs(args.tmpdir, 0700)
-        d = tempfile.mkdtemp(dir=args.tmpdir)
+        d = ensure_tmpdir(args.tmpdir)
         inputs = [write_input_file(args, ast, d, i) for i in rng]
         primary = inputs[primary_idx]
         ofile = "out.o"
@@ -122,17 +153,23 @@ def run_once_with_primary(args, ast, rng, primary_idx):
             if args.debug:
                 command = ["lldb", "--"] + command
             stats = "stats.json"
-            argv = command + ["-Xllvm", "-stats",
-                              "-Xllvm", "-stats-json",
-                              "-Xllvm", "-info-output-file=" + stats]
+            if args.llvm_stat_reporter:
+                argv = command + ["-Xllvm", "-stats",
+                                  "-Xllvm", "-stats-json",
+                                  "-Xllvm", "-info-output-file=" + stats]
+            else:
+                argv = command + ["-stats-output-dir", d]
             try:
                 subprocess.check_call(argv, cwd=d)
             except subprocess.CalledProcessError as e:
                 if e.returncode != args.expected_exit_code:
                     raise
 
-            with open(os.path.join(d, stats)) as f:
-                r = json.load(f)
+            if args.llvm_stat_reporter:
+                with open(os.path.join(d, stats)) as f:
+                    r = json.load(f)
+            else:
+                r = merge_all_jobstats(load_stats_dir(d)).stats
     finally:
         shutil.rmtree(d)
 
@@ -168,6 +205,17 @@ def run_many(args):
         print("")
         exit(1)
 
+    if not args.llvm_stat_reporter:
+        if not supports_stats_output_dir(args):
+            print("**************************************************")
+            print("")
+            print("unable to use new-style -stats-output-dir reporting,")
+            print("falling back to old-style -Xllvm -stats-json reporting")
+            print("(run with --llvm-stat-reporter to silence this warning)")
+            print("")
+            print("**************************************************")
+            args.llvm_stat_reporter = True
+
     ast = gyb.parse_template(args.file.name, args.file.read())
     rng = range(args.begin, args.end, args.step)
     if args.step > (args.end - args.begin):
@@ -334,22 +382,62 @@ def fit_function_to_data_by_least_squares(objective, params, bounds, xs, ys):
     raise ValueError("Nelder-Mead failed %d retries" % retries)
 
 
+# Fit a 2-parameter linear model f(x) = const + coeff * x to a set
+# of data (lists of xs and ys). Returns (coeff, const, fit).
+def fit_linear_model(xs, ys):
+    # By the book: https://en.wikipedia.org/wiki/Simple_linear_regression
+    n = float(len(xs))
+    assert n == len(ys)
+    if n == 0:
+        return 0, 0, 1.0
+
+    # Don't bother with anything fancy if function is constant.
+    if all(y == ys[0] for y in ys):
+        return (0.0, ys[0], 1.0)
+
+    sum_x = sum(xs)
+    sum_y = sum(ys)
+    sum_prod = sum(a * b for a, b in zip(xs, ys))
+    sum_x_sq = sum(a ** 2 for a in xs)
+    sum_y_sq = sum(b ** 2 for b in ys)
+    mean_x = sum_x / n
+    mean_y = sum_y / n
+    mean_prod = sum_prod / n
+    mean_x_sq = sum_x_sq / n
+    mean_y_sq = sum_y_sq / n
+    covar_xy = mean_prod - mean_x * mean_y
+    var_x = mean_x_sq - mean_x**2
+    var_y = mean_y_sq - mean_y**2
+    slope = covar_xy / var_x
+    inter = mean_y - slope * mean_x
+
+    # Compute the correlation coefficient aka r^2, to compare goodness-of-fit.
+    if is_somewhat_small(var_y):
+        # all of the outputs are the same, so this is a perfect fit
+        assert is_somewhat_small(covar_xy)
+        cor_coeff_sq = 1.0
+    elif is_somewhat_small(var_x):
+        # all of the inputs are the same, and the outputs are different, so
+        # this is a completely imperfect fit
+        assert is_somewhat_small(covar_xy)
+        cor_coeff_sq = 0.0
+    else:
+        cor_coeff_sq = covar_xy**2 / (var_x * var_y)
+
+    return slope, inter, cor_coeff_sq
+
+
 # Fit a 3-parameter polynomial model f(x) = const + coeff * x^exp to a set
 # of data (lists of xs and ys). Returns (exp, coeff, fit).
 def fit_polynomial_model(xs, ys):
 
-    # Don't bother running a simplex around a flat landscape if the input is
-    # constant.
-    if all(y == ys[0] for y in ys):
-        return (0.0, 0.0, 1.0)
-
     PolynomialParams = namedtuple('PolynomialParams',
                                   ['const', 'coeff', 'exp'])
     params = PolynomialParams(const=0.0, coeff=1.0, exp=1.0)
     mag = max(abs(y) for y in ys)
     bounds = PolynomialParams(const=(0, mag),
                               coeff=(0, mag),
-                              exp=(0.25, 3.0))
+                              exp=(0.25, 8.0))
 
     def objective(params, x):
         return params.const + params.coeff * (x ** params.exp)
@@ -391,19 +479,29 @@ def self_test():
 
     class Tests(unittest.TestCase):
 
+        def check_linearfit(self, xs, ys, lin, fit=1.0):
+            (m, _, f) = fit_linear_model(xs, ys)
+            print("linearfit(xs, ys, lin=%f, fit=%f) = (%f, %f)" %
+                  (lin, fit, m, f))
+            self.assertAlmostEqual(m, lin, places=1)
+            self.assertAlmostEqual(f, fit, places=1)
+            return f
+
         def check_polyfit(self, xs, ys, exp, fit=1.0):
             (e, _, f) = fit_polynomial_model(xs, ys)
             print("polyfit(xs, ys, exp=%f, fit=%f) = (%f, %f)" %
                   (exp, fit, e, f))
-            self.assertAlmostEqual(e, exp, places=0)
-            self.assertAlmostEqual(f, fit, places=0)
+            self.assertAlmostEqual(e, exp, places=1)
+            self.assertAlmostEqual(f, fit, places=1)
+            return f
 
         def check_expfit(self, xs, ys, base, fit=1.0):
             (b, _, f) = fit_exponential_model(xs, ys)
             print("expfit(xs, ys, base=%f, fit=%f) = (%f, %f)" %
                   (base, fit, b, f))
-            self.assertAlmostEqual(b, base, places=0)
-            self.assertAlmostEqual(f, fit, places=0)
+            self.assertAlmostEqual(b, base, places=1)
+            self.assertAlmostEqual(f, fit, places=1)
+            return f
 
         def test_tuples(self):
             self.assertEqual(tup_distance((1, 0, 0), (0, 0, 0)), 1.0)
@@ -436,6 +534,47 @@ def self_test():
             self.check_polyfit([5, 10, 15],
                                [307, 632, 957], 1)
 
+        # "Basically linear", with a little nonlinearity in the first
+        # point. Polynomial-fit fails here because the simplex algorithm
+        # keeps trying to account for the first point by admitting a
+        # nonzero nonlinear term, thus bending the whole line instead of
+        # focusing on the linear and constant terms. So we run an
+        # independent fit on a "strictly linear" model too.
+        def test_eventually_linear(self):
+            self.check_linearfit([1, 2, 3, 4, 5, 6, 7, 8],
+                                 [15, 20, 30, 40, 50, 60, 70, 80],
+                                 9.6)
+
+        # Double check that linear-fit (which "always fits") isn't
+        # preferred over good nonlinear fits.
+        def test_linear_model_of_poly(self):
+            xs = [10, 20, 30, 40, 50, 60]
+            ys = [100, 400, 900, 1600, 2500, 3600]
+            lf = self.check_linearfit(xs, ys, 70)
+            pf = self.check_polyfit(xs, ys, 2)
+            self.assertGreater(pf, lf)
+
+        def test_linear_model_of_poly_2(self):
+            xs = [10, 20, 30, 40, 50, 60]
+            ys = [1000, 8000, 27000, 64000, 125000, 216000]
+            lf = self.check_linearfit(xs, ys, 4180, 0.87)
+            pf = self.check_polyfit(xs, ys, 3)
+            self.assertGreater(pf, lf)
+
+        def test_linear_model_of_poly_3(self):
+            xs = [1, 2, 3, 4, 5]
+            ys = [1.0, 2.3, 3.74, 5.28, 6.9]
+            lf = self.check_linearfit(xs, ys, 1.47)
+            pf = self.check_polyfit(xs, ys, 1.2)
+            self.assertGreater(pf, lf)
+
+        def test_linear_model_of_poly_offset(self):
+            xs = [10, 20, 30, 40, 50, 60]
+            ys = [1100, 1400, 1900, 2600, 3500, 4600]
+            lf = self.check_linearfit(xs, ys, 70)
+            pf = self.check_polyfit(xs, ys, 2)
+            self.assertGreater(pf, lf)
+
         def test_linear_offset(self):
             self.check_polyfit([1, 2, 3, 4, 5, 6],
                                [1000 + i for i in range(1, 7)], 1)
@@ -491,25 +630,38 @@ def report(args, rng, runs):
         vals = [r[k] for r in runs]
         bounded = [max(v, 1) for v in vals]
         one_fit = False
+        perfect_fit = False
+        fit_r2_thresh = 0.99
+        lin_b, lin_a, lin_r2 = fit_linear_model(rng, bounded)
+        if lin_r2 > fit_r2_thresh:
+            one_fit = True
+        if lin_r2 == 1.0:
+            perfect_fit = True
         p_b, p_a, p_r2 = (1.0, 1.0, 0.0)
         e_b, e_a, e_r2 = (1.0, 1.0, 0.0)
         try:
-            p_b, p_a, p_r2 = fit_polynomial_model(rng, bounded)
-            if p_r2 > 0.9:
-                one_fit = True
+            if not perfect_fit:
+                p_b, p_a, p_r2 = fit_polynomial_model(rng, bounded)
+                if p_r2 > fit_r2_thresh:
+                    one_fit = True
+                if p_r2 == 1.0:
+                    perfect_fit = True
         except ValueError:
             pass
         try:
-            e_b, e_a, e_r2 = fit_exponential_model(rng, bounded)
-            if e_r2 > 0.9:
-                one_fit = True
+            if not perfect_fit:
+                e_b, e_a, e_r2 = fit_exponential_model(rng, bounded)
+                if e_r2 > fit_r2_thresh:
+                    one_fit = True
         except ValueError:
             pass
         if not one_fit:
-            print("failed to fit either polynomial or exponential model to " +
-                  repr(vals))
+            print("failed to fit model to " + repr(vals))
             return True
-        if p_r2 >= e_r2:
+        if lin_r2 >= e_r2 and lin_r2 >= p_r2:
+            # strict-linear is best
+            rows.append((False, 0.0 if lin_b == 0 else 1.0, k, vals))
+        elif p_r2 >= e_r2:
             # polynomial is best
             rows.append((False, p_b, k, vals))
         else:
@@ -598,6 +750,9 @@ def main():
     parser.add_argument(
         '--debug', action='store_true',
         default=False, help='invoke lldb on each scale test')
+    parser.add_argument(
+        '--llvm-stat-reporter', action='store_true',
+        default=False, help='only collect stats via old-style LLVM reporter')
     parser.add_argument(
         '--self-test', action='store_true',
         default=False, help='run arithmetic unit-tests of scale-test itself')