[benchmark] Fix quantile estimation type

palimondo · palimondo · commit a9f0ce43389f · 2018-09-20T09:19:07.000+02:00
The correct quantile estimation type for printing all measurements in the summary report while `quantile == num-samples - 1` is R-1, SAS-3. It's the inverse of empirical distribution function. References: * https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample * discussion in #19097 (comment)
diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
@@ -34,8 +34,7 @@ class `ReportFormatter` creates the test comparison report in specified format.
 import sys
 from bisect import bisect, bisect_left, bisect_right
 from collections import namedtuple
-from decimal import Decimal, ROUND_HALF_EVEN
-from math import sqrt
+from math import ceil, sqrt
 
 
 class Sample(namedtuple('Sample', 'i num_iters runtime')):
@@ -143,15 +142,12 @@ def max(self):
         return self.samples[-1].runtime
 
     def quantile(self, q):
-        """Return runtime of a sample nearest to the quantile.
+        """Return runtime for given quantile.
 
-        Explicitly uses round-half-to-even rounding algorithm to match the
-        behavior of numpy's quantile(interpolation='nearest') and quantile
-        estimate type R-3, SAS-2. See:
+        Equivalent to quantile estimate type R-1, SAS-3. See:
         https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
         """
-        index = int(Decimal((self.count - 1) * Decimal(q))
-                    .quantize(0, ROUND_HALF_EVEN))
+        index = max(0, int(ceil(self.count * float(q))) - 1)
         return self.samples[index].runtime
 
     @property
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
@@ -62,6 +62,17 @@ def test_stores_samples(self):
         self.assertEquals(s.num_iters, 42)
         self.assertEquals(s.runtime, 1000)
 
+    def test_quantile(self):
+        self.assertEquals(self.samples.quantile(1), 1000)
+        self.assertEquals(self.samples.quantile(0), 1000)
+        self.samples.add(Sample(2, 1, 1100))
+        self.assertEquals(self.samples.quantile(0), 1000)
+        self.assertEquals(self.samples.quantile(1), 1100)
+        self.samples.add(Sample(3, 1, 1050))
+        self.assertEquals(self.samples.quantile(0), 1000)
+        self.assertEquals(self.samples.quantile(.5), 1050)
+        self.assertEquals(self.samples.quantile(1), 1100)
+
     def assertEqualFiveNumberSummary(self, ss, expected_fns):
         e_min, e_q1, e_median, e_q3, e_max = expected_fns
         self.assertEquals(ss.min, e_min)
@@ -81,7 +92,7 @@ def test_computes_five_number_summary(self):
             self.samples, (1000, 1000, 1050, 1100, 1100))
         self.samples.add(Sample(4, 1, 1025))
         self.assertEqualFiveNumberSummary(
-            self.samples, (1000, 1025, 1050, 1050, 1100))
+            self.samples, (1000, 1000, 1025, 1050, 1100))
         self.samples.add(Sample(5, 1, 1075))
         self.assertEqualFiveNumberSummary(
             self.samples, (1000, 1025, 1050, 1075, 1100))
@@ -447,7 +458,7 @@ def test_results_from_merge_verbose(self):
         self.assertTrue(isinstance(result, PerformanceTestResult))
         self.assertEquals(result.min, 350815)
         self.assertEquals(result.max, 376131)
-        self.assertEquals(result.median, 363094)
+        self.assertEquals(result.median, 358817)
         self.assertAlmostEquals(result.sd, 8443.37, places=2)
         self.assertAlmostEquals(result.mean, 361463.25, places=2)
         self.assertEquals(result.num_samples, 8)
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
@@ -31,15 +31,13 @@ struct BenchResults {
     self.stats = self.samples.reduce(into: Stats(), Stats.collect)
   }
 
-  /// Return sample at index nearest to the `quantile`.
+  /// Return measured value for given `quantile`.
   ///
-  /// Explicitly uses round-half-to-even rounding algorithm to match the
-  /// behavior of numpy's quantile(interpolation='nearest') and quantile
-  /// estimate type R-3, SAS-2. See:
+  /// Equivalent to quantile estimate type R-1, SAS-3. See:
   /// https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
   subscript(_ quantile: Double) -> T {
-    let index = Int(
-      (Double(samples.count - 1) * quantile).rounded(.toNearestOrEven))
+    let index = Swift.max(0,
+      Int((Double(samples.count) * quantile).rounded(.up)) - 1)
     return samples[index]
   }