Merge pull request swiftlang#39340 from lorentey/fix-benchmarks

lorentey · web-flow · commit 0b656b6aa94c · 2021-09-17T00:44:18.000-07:00
[benchmark] Fix spurious benchmark regressions
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -199,7 +199,30 @@ benchmarks will be compiled with -Onone!**
 * `$ ./Benchmark_O --tags=Dictionary`
 * `$ ./Benchmark_O --skip-tags=unstable,skip,validation`
 
-### Note
+### Deterministic Hashing Requirement
+
+To run benchmarks, you'll need to disable randomized hash seeding by setting the
+`SWIFT_DETERMINISTIC_HASHING` environment variable to `1`. (You only need to do
+this when running the benchmark executables directly -- the driver script does
+this for you automatically.)
+
+* `$ env SWIFT_DETERMINISTIC_HASHING=1 ./Benchmark_O --num-iters=1 --num-samples=1`
+
+This makes for more stable results, by preventing random hash collision changes
+from affecting benchmark measurements. Benchmark measurements start by checking
+that deterministic hashing is enabled and they fail with a runtime trap when it
+isn't.
+
+If for some reason you want to run the benchmarks using standard randomized
+hashing, you can disable this check by passing the
+`--allow-nondeterministic-hashing` option to the executable.
+
+* `$ ./Benchmark_O --num-iters=1 --num-samples=1 --allow-nondeterministic-hashing`
+
+This will affect the reliability of measurements, so this is not recommended.
+
+### Benchmarking by Numbers
+
 As a shortcut, you can also refer to benchmarks by their ordinal numbers.
 These are printed out together with benchmark names and tags using the
 `--list` parameter. For a complete list of all available performance tests run
diff --git a/benchmark/cmake/modules/AddSwiftBenchmarkSuite.cmake b/benchmark/cmake/modules/AddSwiftBenchmarkSuite.cmake
@@ -683,7 +683,7 @@ function (swift_benchmark_compile_archopts)
           ${objcfile}
           "-o" "${OUTPUT_EXEC}"
         COMMAND
-        "codesign" "-f" "-s" "-" "${OUTPUT_EXEC}")
+        "${srcdir}/../utils/swift-darwin-postprocess.py" "${OUTPUT_EXEC}")
   else()
     # TODO: rpath.
     add_custom_command(
diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
@@ -56,6 +56,11 @@ class BenchmarkDriver(object):
         """
         self.args = args
         self.run_env = os.environ.copy()
+
+        # Set a constant hash seed. Some tests are highly sensitive to
+        # fluctuations in the number of hash collisions.
+        self.run_env["SWIFT_DETERMINISTIC_HASHING"] = "1"
+
         if hasattr(args, 'libdir') and args.libdir:
             # The benchmark binaries should pick up the built swift libraries
             # automatically, because their RPATH should point to ../lib/swift
@@ -65,15 +70,13 @@ class BenchmarkDriver(object):
                 self.run_env["DYLD_LIBRARY_PATH"] = args.libdir
             elif platform.system() == "Linux":
                 self.run_env["LD_LIBRARY_PATH"] = args.libdir
+
         self._subprocess = _subprocess or subprocess
         self.all_tests = []
         self.test_number = {}
         self.tests = tests or self._get_tests()
         self.parser = parser or LogParser()
         self.results = {}
-        # Set a constant hash seed. Some tests are currently sensitive to
-        # fluctuations in the number of hash collisions.
-        os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1"
 
     def _invoke(self, cmd):
         return self._subprocess.check_output(
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
@@ -104,6 +104,9 @@ struct TestConfig {
   // Should we log the measurement metadata?
   let logMeta: Bool
 
+  // Allow running with nondeterministic hashing?
+  var allowNondeterministicHashing: Bool
+
   /// After we run the tests, should the harness sleep to allow for utilities
   /// like leaks that require a PID to run on the test harness.
   let afterRunSleep: UInt32?
@@ -128,6 +131,7 @@ struct TestConfig {
       var verbose: Bool?
       var logMemory: Bool?
       var logMeta: Bool?
+      var allowNondeterministicHashing: Bool?
       var action: TestAction?
       var tests: [String]?
     }
@@ -191,6 +195,10 @@ struct TestConfig {
     p.addArgument("--list", \.action, defaultValue: .listTests,
                   help: "don't run the tests, just log the list of test \n" +
                         "numbers, names and tags (respects specified filters)")
+    p.addArgument("--allow-nondeterministic-hashing",
+                  \.allowNondeterministicHashing, defaultValue: true,
+                  help: "Don't trap when running without the \n" +
+                        "SWIFT_DETERMINISTIC_HASHING=1 environment variable")
     p.addArgument(nil, \.tests) // positional arguments
 
     let c = p.parse()
@@ -208,6 +216,7 @@ struct TestConfig {
     logMeta = c.logMeta ?? false
     afterRunSleep = c.afterRunSleep
     action = c.action ?? .run
+    allowNondeterministicHashing = c.allowNondeterministicHashing ?? false
     tests = TestConfig.filterTests(registeredBenchmarks,
                                     tests: c.tests ?? [],
                                     tags: c.tags ?? [],
@@ -671,6 +680,17 @@ final class TestRunner {
   }
 }
 
+extension Hasher {
+  static var isDeterministic: Bool {
+    // This is a quick test for deterministic hashing.
+    // When hashing uses a random seed, each `Set` value
+    // contains its members in some unique, random order.
+    let set1 = Set(0 ..< 100)
+    let set2 = Set(0 ..< 100)
+    return set1.elementsEqual(set2)
+  }
+}
+
 public func main() {
   let config = TestConfig(registeredBenchmarks)
   switch (config.action) {
@@ -682,6 +702,18 @@ public func main() {
       print(testDescription)
     }
   case .run:
+    if !config.allowNondeterministicHashing && !Hasher.isDeterministic {
+      fatalError("""
+        Benchmark runs require deterministic hashing to be enabled.
+
+        This prevents spurious regressions in hashed collection performance.
+        You can do this by setting the SWIFT_DETERMINISTIC_HASHING environment
+        variable to 1.
+
+        If you know what you're doing, you can disable this check by passing
+        the option '--allow-nondeterministic-hashing to the benchmarking executable.
+        """)
+    }
     TestRunner(config).runBenchmarks()
     if let x = config.afterRunSleep {
       sleep(x)
diff --git a/utils/swift_build_support/swift_build_support/products/benchmarks.py b/utils/swift_build_support/swift_build_support/products/benchmarks.py
@@ -52,14 +52,16 @@ def should_test(self, host_target):
         return self.args.test_toolchainbenchmarks
 
     def _get_test_environment(self, host_target):
+        env = {
+            "SWIFT_DETERMINISTIC_HASHING": "1"
+        }
         if platform.system() == 'Darwin':
             # the resulting binaries would search first in /usr/lib/swift,
             # we need to prefer the libraries we just built
-            return {'DYLD_LIBRARY_PATH': os.path.join(
+            env['DYLD_LIBRARY_PATH'] = os.path.join(
                 _get_toolchain_path(host_target, self, self.args),
-                'usr', 'lib', 'swift', 'macosx')}
-
-        return None
+                'usr', 'lib', 'swift', 'macosx')
+        return env
 
     def test(self, host_target):
         """Just run a single instance of the command for both .debug and