Skip to content

Commit 90025ca

Browse files
authored
Merge pull request #12415 from palimondo/empathy-test
2 parents d106d0b + c288f75 commit 90025ca

File tree

9 files changed

+174
-61
lines changed

9 files changed

+174
-61
lines changed

benchmark/README.md

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,22 +86,38 @@ Using the Benchmark Driver
8686
* `--num-samples`
8787
* Control the number of samples to take for each test
8888
* `--list`
89-
* Print a list of available tests
89+
* Print a list of available tests matching specified criteria
90+
* `--tags`
91+
* Run tests that are labeled with specified [tags](https://github.com/apple/swift/blob/master/benchmark/utils/TestsUtils.swift#L19)
92+
(comma separated list); multiple tags are interpreted as logical AND, i.e.
93+
run only test that are labeled with all the supplied tags
94+
* `--skip-tags`
95+
* Don't run tests that are labeled with any of the specified tags (comma
96+
separated list); default value: `skip,unstable`; to get complete list of
97+
tests, specify empty `--skip-tags=`
98+
9099

91100
### Examples
92101

93102
* `$ ./Benchmark_O --num-iters=1 --num-samples=1`
94103
* `$ ./Benchmark_Onone --list`
95104
* `$ ./Benchmark_Osize Ackermann`
105+
* `$ ./Benchmark_O --tags=Dictionary`
106+
* `$ ./Benchmark_O --skip-tags=unstable,skip,validation`
96107

97108
### Note
98109
As a shortcut, you can also refer to benchmarks by their ordinal numbers.
99-
The regular `--list` option does not provide these, but you can run:
100-
* `$ ./Benchmark_O --list --run-all | tail -n +2 | nl`
101-
You can use ordinal numbers instead of test names like this:
110+
These are printed out together with benchmark names and tags using the
111+
`--list` parameter. For a complete list of all available performance tests run
112+
* `$ ./Benchmark_O --list --skip-tags=`
113+
114+
You can use test numbers instead of test names like this:
102115
* `$ ./Benchmark_O 1 42`
103116
* `$ ./Benchmark_Driver run 1 42`
104117

118+
Test numbers are not stable in the long run, adding and removing tests from the
119+
benchmark suite will reorder them, but they are stable for a given build.
120+
105121
Using the Harness Generator
106122
---------------------------
107123

@@ -186,3 +202,21 @@ public func run_YourTestName(N: Int) {
186202

187203
The current set of tags are defined by the `BenchmarkCategory` enum in
188204
`TestsUtils.swift` .
205+
206+
Testing the Benchmark Drivers
207+
-----------------------------
208+
When working on tests, after the initial build
209+
````
210+
swift-source$ ./swift/utils/build-script -R -B
211+
````
212+
you can rebuild just the benchmarks:
213+
````
214+
swift-source$ export SWIFT_BUILD_DIR=`pwd`/build/Ninja-ReleaseAssert/swift-macosx-x86_64
215+
swift-source$ ninja -C ${SWIFT_BUILD_DIR} swift-benchmark-macosx-x86_64
216+
````
217+
218+
When modifying the testing infrastructure, you should verify that your changes
219+
pass all the tests:
220+
````
221+
swift-source$ ./llvm/utils/lit/lit.py -sv ${SWIFT_BUILD_DIR}/test-macosx-x86_64/benchmark
222+
````

benchmark/scripts/Benchmark_Driver

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -118,28 +118,32 @@ def instrument_test(driver_path, test, num_samples):
118118
return avg_test_output
119119

120120

121-
BENCHMARK_OUTPUT_RE = re.compile('([^,]+),')
122-
123-
124121
def get_tests(driver_path, args):
125122
"""Return a list of available performance tests"""
126123
driver = ([driver_path, '--list'])
124+
# Use tab delimiter for easier parsing to override the default comma.
125+
# (The third 'column' is always comma-separated list of tags in square
126+
# brackets -- currently unused here.)
127+
driver.append('--delim=\t')
127128
if args.benchmarks or args.filters:
128-
driver.append('--run-all')
129-
tests = []
130-
for l in subprocess.check_output(driver).split("\n")[1:]:
131-
m = BENCHMARK_OUTPUT_RE.match(l)
132-
if m is None:
133-
continue
134-
tests.append(m.group(1))
129+
driver.append('--skip-tags=') # list all tests, don't skip any tags
130+
index_name_pairs = [
131+
line.split('\t')[:2] for line in
132+
subprocess.check_output(driver).split('\n')[1:-1]
133+
]
134+
indices, names = zip(*index_name_pairs) # unzip list of pairs into 2 lists
135135
if args.filters:
136136
regexes = [re.compile(pattern) for pattern in args.filters]
137137
return sorted(list(set([name for pattern in regexes
138-
for name in tests if pattern.match(name)])))
138+
for name in names if pattern.match(name)])))
139139
if not args.benchmarks:
140-
return tests
141-
tests.extend(map(str, range(1, len(tests) + 1))) # ordinal numbers
142-
return sorted(list(set(tests).intersection(set(args.benchmarks))))
140+
return names
141+
benchmarks = set(args.benchmarks)
142+
index_to_name = dict(index_name_pairs)
143+
indexed_names = [index_to_name[i]
144+
for i in benchmarks.intersection(set(indices))]
145+
return sorted(list(
146+
benchmarks.intersection(set(names)).union(indexed_names)))
143147

144148

145149
def get_current_git_branch(git_repo_path):

benchmark/utils/DriverUtils.swift

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct Test {
6969

7070
/// The benchmark categories that this test belongs to. Used for filtering.
7171
var tags: [BenchmarkCategory] {
72-
return benchInfo.tags
72+
return benchInfo.tags.sorted()
7373
}
7474

7575
/// An optional initialization function for a benchmark that is run before
@@ -181,7 +181,7 @@ struct TestConfig {
181181

182182
// We support specifying multiple tags by splitting on comma, i.e.:
183183
//
184-
// --tags=array,set
184+
// --tags=Array,Dictionary
185185
//
186186
// FIXME: If we used Error instead of .fail, then we could have a cleaner
187187
// impl here using map on x and tags.formUnion.
@@ -200,7 +200,7 @@ struct TestConfig {
200200

201201
// We support specifying multiple tags by splitting on comma, i.e.:
202202
//
203-
// --skip-tags=array,set
203+
// --skip-tags=Array,Set,unstable,skip
204204
//
205205
// FIXME: If we used Error instead of .fail, then we could have a cleaner
206206
// impl here using map on x and tags.formUnion.
@@ -227,39 +227,22 @@ struct TestConfig {
227227
}
228228

229229
mutating func findTestsToRun() {
230-
let benchmarkNameFilter = Set(filters)
231-
232-
// t is needed so we don't capture an ivar of a mutable inout self.
233-
let t = tags
234-
let st = skipTags
235-
let filteredTests = Array(registeredBenchmarks.filter { benchInfo in
236-
if !t.isSubset(of: benchInfo.tags) {
237-
return false
238-
}
239-
240-
if !st.isDisjoint(with: benchInfo.tags) {
241-
return false
242-
}
243-
244-
// If the user did not specified a benchmark name filter and our tags are
245-
// a subset of the specified tags by the user, return true. We want to run
246-
// this test.
247-
if benchmarkNameFilter.isEmpty {
248-
return true
230+
registeredBenchmarks.sort()
231+
let indices = Dictionary(uniqueKeysWithValues:
232+
zip(registeredBenchmarks.map{$0.name}, 1...))
233+
let benchmarkNamesOrIndices = Set(filters)
234+
// needed so we don't capture an ivar of a mutable inout self.
235+
let (_tags, _skipTags) = (tags, skipTags)
236+
237+
tests = registeredBenchmarks.filter { benchmark in
238+
if benchmarkNamesOrIndices.isEmpty {
239+
return benchmark.tags.isSuperset(of: _tags) &&
240+
benchmark.tags.isDisjoint(with: _skipTags)
241+
} else {
242+
return benchmarkNamesOrIndices.contains(benchmark.name) ||
243+
benchmarkNamesOrIndices.contains(String(indices[benchmark.name]!))
249244
}
250-
251-
// Otherwise, we need to check if our benchInfo's name is in the benchmark
252-
// name filter list. If it isn't, then we shouldn't process it.
253-
return benchmarkNameFilter.contains(benchInfo.name)
254-
}).sorted()
255-
256-
if (filteredTests.isEmpty) {
257-
return
258-
}
259-
260-
tests = filteredTests.enumerated().map {
261-
Test(benchInfo: $0.element, index: $0.offset + 1)
262-
}
245+
}.map { Test(benchInfo: $0, index: indices[$0.name]!) }
263246
}
264247
}
265248

@@ -382,14 +365,13 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
382365

383366
let sampler = SampleRunner()
384367
for s in 0..<c.numSamples {
368+
test.setUpFunction?()
385369
let time_per_sample: UInt64 = 1_000_000_000 * UInt64(c.iterationScale)
386370

387371
var scale : UInt
388372
var elapsed_time : UInt64 = 0
389373
if c.fixedNumIters == 0 {
390-
test.setUpFunction?()
391374
elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1)
392-
test.tearDownFunction?()
393375

394376
if elapsed_time > 0 {
395377
scale = UInt(time_per_sample / elapsed_time)
@@ -402,6 +384,9 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
402384
} else {
403385
// Compute the scaling factor if a fixed c.fixedNumIters is not specified.
404386
scale = c.fixedNumIters
387+
if scale == 1 {
388+
elapsed_time = sampler.run(test.name, fn: testFn, num_iters: 1)
389+
}
405390
}
406391
// Make integer overflow less likely on platforms where Int is 32 bits wide.
407392
// FIXME: Switch BenchmarkInfo to use Int64 for the iteration scale, or fix
@@ -413,9 +398,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
413398
if c.verbose {
414399
print(" Measuring with scale \(scale).")
415400
}
416-
test.setUpFunction?()
417401
elapsed_time = sampler.run(test.name, fn: testFn, num_iters: scale)
418-
test.tearDownFunction?()
419402
} else {
420403
scale = 1
421404
}
@@ -424,6 +407,7 @@ func runBench(_ test: Test, _ c: TestConfig) -> BenchResults? {
424407
if c.verbose {
425408
print(" Sample \(s),\(samples[s])")
426409
}
410+
test.tearDownFunction?()
427411
}
428412

429413
let (mean, sd) = internalMeanSD(samples)
@@ -497,9 +481,9 @@ public func main() {
497481
fatalError("\(msg)")
498482
case .listTests:
499483
config.findTestsToRun()
500-
print("Enabled Tests\(config.delim)Tags")
484+
print("#\(config.delim)Test\(config.delim)[Tags]")
501485
for t in config.tests {
502-
print("\(t.name)\(config.delim)\(t.tags)")
486+
print("\(t.index)\(config.delim)\(t.name)\(config.delim)\(t.tags)")
503487
}
504488
case .run:
505489
config.findTestsToRun()

benchmark/utils/TestsUtils.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,18 @@ public enum BenchmarkCategory : String {
7070
case skip
7171
}
7272

73+
extension BenchmarkCategory : CustomStringConvertible {
74+
public var description: String {
75+
return self.rawValue
76+
}
77+
}
78+
79+
extension BenchmarkCategory : Comparable {
80+
public static func < (lhs: BenchmarkCategory, rhs: BenchmarkCategory) -> Bool {
81+
return lhs.rawValue < rhs.rawValue
82+
}
83+
}
84+
7385
public struct BenchmarkPlatformSet : OptionSet {
7486
public let rawValue: Int
7587

@@ -111,7 +123,7 @@ public struct BenchmarkInfo {
111123
/// A set of category tags that describe this benchmark. This is used by the
112124
/// harness to allow for easy slicing of the set of benchmarks along tag
113125
/// boundaries, e.x.: run all string benchmarks or ref count benchmarks, etc.
114-
public var tags: [BenchmarkCategory]
126+
public var tags: Set<BenchmarkCategory>
115127

116128
/// The platforms that this benchmark supports. This is an OptionSet.
117129
private var unsupportedPlatforms: BenchmarkPlatformSet
@@ -146,7 +158,7 @@ public struct BenchmarkInfo {
146158
unsupportedPlatforms: BenchmarkPlatformSet = []) {
147159
self.name = name
148160
self._runFunction = runFunction
149-
self.tags = tags
161+
self.tags = Set(tags)
150162
self._setUpFunction = setUpFunction
151163
self._tearDownFunction = tearDownFunction
152164
self.unsupportedPlatforms = unsupportedPlatforms
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// REQUIRES: OS=macosx
2+
// REQUIRES: asserts
3+
// REQUIRES: benchmark
4+
// REQUIRES: CMAKE_GENERATOR=Ninja
5+
6+
// Integration tests between Benchmark_Driver and Benchmark_O
7+
// TODO: Keep the "run just once" check and move the rest into unit tests for
8+
// Benchmark_Driver, as they are redundant and unnecessarily slow.
9+
10+
// RUN: %Benchmark_Driver run Ackermann | %FileCheck %s --check-prefix RUNNAMED
11+
// RUNNAMED: Ackermann
12+
13+
// RUN: %Benchmark_Driver run 1 | %FileCheck %s --check-prefix RUNBYNUMBER
14+
// RUNBYNUMBER: Ackermann
15+
16+
// RUN: %Benchmark_Driver run 1 Ackermann 1 \
17+
// RUN: | %FileCheck %s --check-prefix RUNJUSTONCE
18+
// RUNJUSTONCE-LABEL: Ackermann
19+
// RUNJUSTONCE-NOT: Ackermann
20+
21+
// RUN: %Benchmark_Driver run -f Acker | %FileCheck %s --check-prefix RUNFILTER
22+
// RUNFILTER: Ackermann

test/benchmark/Benchmark_O.test-sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// REQUIRES: OS=macosx
2+
// REQUIRES: asserts
3+
// REQUIRES: benchmark
4+
// REQUIRES: CMAKE_GENERATOR=Ninja
5+
6+
// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTTAGS
7+
// LISTTAGS: AngryPhonebook,[
8+
// LISTTAGS-NOT: TestsUtils.BenchmarkCategory.
9+
// LISTTAGS-SAME: String,
10+
// LISTTAGS-SAME: ]
11+
12+
// RUN: %Benchmark_O AngryPhonebook --num-iters=1 \
13+
// RUN: | %FileCheck %s --check-prefix NUMITERS1
14+
// NUMITERS1: AngryPhonebook,1
15+
// NUMITERS1-NOT: 0,0,0,0,0
16+
17+
// Should run benchmark by name, even if its tags match the default skip-tags
18+
// (unstable,skip). Ackermann is marked unstable
19+
// RUN: %Benchmark_O Ackermann | %FileCheck %s --check-prefix NAMEDSKIP
20+
// NAMEDSKIP: Ackermann
21+
22+
// RUN: %Benchmark_O --list --tags=Dictionary,Array \
23+
// RUN: | %FileCheck %s --check-prefix ANDTAGS
24+
// ANDTAGS: TwoSum
25+
// ANDTAGS-NOT: Array2D
26+
// ANDTAGS-NOT: DictionarySwap
27+
28+
// RUN: %Benchmark_O --list --tags=algorithm --skip-tags=validation \
29+
// RUN: | %FileCheck %s --check-prefix TAGSANDSKIPTAGS
30+
// TAGSANDSKIPTAGS: Ackermann
31+
// TAGSANDSKIPTAGS: DictOfArraysToArrayOfDicts
32+
// TAGSANDSKIPTAGS: Fibonacci
33+
// TAGSANDSKIPTAGS: RomanNumbers
34+
35+
// RUN: %Benchmark_O --list --tags=algorithm \
36+
// RUN: --skip-tags=validation,Dictionary,String \
37+
// RUN: | %FileCheck %s --check-prefix ORSKIPTAGS
38+
// ORSKIPTAGS: Ackermann
39+
// ORSKIPTAGS-NOT: DictOfArraysToArrayOfDicts
40+
// ORSKIPTAGS: Fibonacci
41+
// ORSKIPTAGS-NOT: RomanNumbers
42+
43+
// RUN: %Benchmark_O --list | %FileCheck %s --check-prefix LISTPRECOMMIT
44+
// LISTPRECOMMIT: #,Test,[Tags]
45+
// LISTPRECOMMIT-NOT: Ackermann
46+
// LISTPRECOMMIT: {{[0-9]+}},AngryPhonebook
47+
48+
// RUN: %Benchmark_O --list --skip-tags= | %FileCheck %s --check-prefix LISTALL
49+
// LISTALL: Ackermann
50+
// LISTALL: AngryPhonebook

test/lit.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ config.complete_test = inferSwiftBinary('complete-test')
272272
config.swift_api_digester = inferSwiftBinary('swift-api-digester')
273273
config.swift_refactor = inferSwiftBinary('swift-refactor')
274274
config.swift_demangle_yamldump = inferSwiftBinary('swift-demangle-yamldump')
275+
config.benchmark_o = inferSwiftBinary('Benchmark_O')
276+
config.benchmark_driver = inferSwiftBinary('Benchmark_Driver')
275277

276278
config.swift_utils = make_path(config.swift_src_root, 'utils')
277279
config.line_directive = make_path(config.swift_utils, 'line-directive')
@@ -366,6 +368,8 @@ config.substitutions.append( ('%swift-llvm-opt', config.swift_llvm_opt) )
366368
config.substitutions.append( ('%llvm-dwarfdump', config.llvm_dwarfdump) )
367369
config.substitutions.append( ('%llvm-dis', config.llvm_dis) )
368370
config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) )
371+
config.substitutions.append( ('%Benchmark_O', config.benchmark_o) )
372+
config.substitutions.append( ('%Benchmark_Driver', config.benchmark_driver) )
369373

370374
# This must come after all substitutions containing "%swift".
371375
config.substitutions.append(

test/lit.site.cfg.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ config.available_features.add("CMAKE_GENERATOR=@CMAKE_GENERATOR@")
8888
if "@SWIFT_ENABLE_SOURCEKIT_TESTS@" == "TRUE":
8989
config.available_features.add('sourcekit')
9090

91+
if "@SWIFT_BUILD_PERF_TESTSUITE@" == "TRUE":
92+
config.available_features.add('benchmark')
93+
9194
if "@SWIFT_ENABLE_GUARANTEED_NORMAL_ARGUMENTS@" == "TRUE":
9295
config.available_features.add('plus_zero_runtime')
9396
else:

0 commit comments

Comments
 (0)