swiftlang · swift-ci · Sep 14, 2018 · Aug 29, 2018 · Aug 29, 2018 · Aug 29, 2018
diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
@@ -434,7 +434,7 @@ class BenchmarkDoctor(object):
         measurements = dict(
             [('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix),
               self.driver.run(benchmark, num_samples=s, num_iters=i,
-                              verbose=True))
+                              verbose=True, measure_memory=True))
              for o in opts
              for s, i in run_args
              for suffix in list('abcde')

diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
@@ -34,6 +34,7 @@ class `ReportFormatter` creates the test comparison report in specified format.
 import sys
 from bisect import bisect, bisect_left, bisect_right
 from collections import namedtuple
+from decimal import Decimal, ROUND_HALF_EVEN
 from math import sqrt
 
 
@@ -141,20 +142,32 @@ def max(self):
         """Maximum sampled value."""
         return self.samples[-1].runtime
 
+    def quantile(self, q):
+        """Return runtime of a sample nearest to the quantile.
+
+        Explicitly uses round-half-to-even rounding algorithm to match the
+        behavior of numpy's quantile(interpolation='nearest') and quantile
+        estimate type R-3, SAS-2. See:
+        https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
+        """
+        index = int(Decimal((self.count - 1) * Decimal(q))
+                    .quantize(0, ROUND_HALF_EVEN))
+        return self.samples[index].runtime
+
     @property
     def median(self):
         """Median sampled value."""
-        return self.samples[self.count / 2].runtime
+        return self.quantile(0.5)
 
     @property
     def q1(self):
         """First Quartile (25th Percentile)."""
-        return self.samples[self.count / 4].runtime
+        return self.quantile(0.25)
 
     @property
     def q3(self):
         """Third Quartile (75th Percentile)."""
-        return self.samples[(self.count / 2) + (self.count / 4)].runtime
+        return self.quantile(0.75)
 
     @property
     def iqr(self):

diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py
@@ -476,10 +476,10 @@ def test_measure_10_independent_1s_benchmark_series(self):
             # 5x i1 series, with 300 μs runtime its possible to take 4098
             # samples/s, but it should be capped at 2k
             ([(_run('B1', num_samples=2048, num_iters=1,
-                    verbose=True), _PTR(min=300))] * 5) +
+                    verbose=True, measure_memory=True), _PTR(min=300))] * 5) +
             # 5x i2 series
             ([(_run('B1', num_samples=2048, num_iters=2,
-                    verbose=True), _PTR(min=300))] * 5)
+                    verbose=True, measure_memory=True), _PTR(min=300))] * 5)
         ))
         doctor = BenchmarkDoctor(self.args, driver)
         with captured_output() as (out, _):

diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
@@ -75,13 +75,13 @@ def test_computes_five_number_summary(self):
             self.samples, (1000, 1000, 1000, 1000, 1000))
         self.samples.add(Sample(2, 1, 1100))
         self.assertEqualFiveNumberSummary(
-            self.samples, (1000, 1000, 1100, 1100, 1100))
+            self.samples, (1000, 1000, 1000, 1100, 1100))
         self.samples.add(Sample(3, 1, 1050))
         self.assertEqualFiveNumberSummary(
-            self.samples, (1000, 1000, 1050, 1050, 1100))
+            self.samples, (1000, 1000, 1050, 1100, 1100))
         self.samples.add(Sample(4, 1, 1025))
         self.assertEqualFiveNumberSummary(
-            self.samples, (1000, 1025, 1050, 1100, 1100))
+            self.samples, (1000, 1025, 1050, 1050, 1100))
         self.samples.add(Sample(5, 1, 1075))
         self.assertEqualFiveNumberSummary(
             self.samples, (1000, 1025, 1050, 1075, 1100))
@@ -156,11 +156,12 @@ def test_excludes_outliers_zero_IQR(self):
         self.samples.add(Sample(0, 2, 23))
         self.samples.add(Sample(1, 2, 18))
         self.samples.add(Sample(2, 2, 18))
+        self.samples.add(Sample(3, 2, 18))
         self.assertEquals(self.samples.iqr, 0)
 
         self.samples.exclude_outliers()
 
-        self.assertEquals(self.samples.count, 2)
+        self.assertEquals(self.samples.count, 3)
         self.assertEqualStats(
             (self.samples.min, self.samples.max), (18, 18))
 
@@ -368,7 +369,6 @@ def test_parse_results_verbose(self):
     Sample 0,11812
     Measuring with scale 90.
     Sample 1,13898
-    Measuring with scale 91.
     Sample 2,11467
 1,AngryPhonebook,3,11467,13898,12392,1315,11812
 Running Array2D for 3 samples.
@@ -388,7 +388,7 @@ def test_parse_results_verbose(self):
         )
         self.assertEquals(r.num_samples, r.samples.num_samples)
         self.assertEquals(results[0].samples.all_samples,
-                          [(0, 78, 11812), (1, 90, 13898), (2, 91, 11467)])
+                          [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)])
 
         r = results[1]
         self.assertEquals(