Skip to content

Commit 03fe8d6

Browse files
committed
[benchmark] Add cross-engine benchmark helpers
1 parent 5fd8840 commit 03fe8d6

File tree

8 files changed

+126
-183
lines changed

8 files changed

+126
-183
lines changed

Sources/RegexBenchmark/Benchmark.swift

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ public protocol RegexBenchmark {
99
public struct Benchmark: RegexBenchmark {
1010
public let name: String
1111
let regex: Regex<Substring>
12-
let ty: MatchType
12+
let type: MatchType
1313
let target: String
1414

1515
public enum MatchType {
@@ -19,7 +19,7 @@ public struct Benchmark: RegexBenchmark {
1919
}
2020

2121
public func run() {
22-
switch ty {
22+
switch type {
2323
case .whole: blackHole(target.wholeMatch(of: regex))
2424
case .allMatches: blackHole(target.matches(of: regex))
2525
case .first: blackHole(target.firstMatch(of: regex))
@@ -30,21 +30,21 @@ public struct Benchmark: RegexBenchmark {
3030
public struct NSBenchmark: RegexBenchmark {
3131
public let name: String
3232
let regex: NSRegularExpression
33-
let ty: NSMatchType
33+
let type: NSMatchType
3434
let target: String
3535

3636
var range: NSRange {
3737
NSRange(target.startIndex..<target.endIndex, in: target)
3838
}
3939

4040
public enum NSMatchType {
41-
case all
41+
case allMatches
4242
case first
4343
}
4444

4545
public func run() {
46-
switch ty {
47-
case .all: blackHole(regex.matches(in: target, range: range))
46+
switch type {
47+
case .allMatches: blackHole(regex.matches(in: target, range: range))
4848
case .first: blackHole(regex.firstMatch(in: target, range: range))
4949
}
5050
}
@@ -110,6 +110,80 @@ public struct BenchmarkRunner {
110110
}
111111
}
112112

113+
/// A benchmark meant to be ran across multiple engines
114+
struct CrossBenchmark {
115+
/// The base name of the benchmark
116+
var baseName: String
117+
118+
/// The string to compile in differnet engines
119+
var regex: String
120+
121+
/// The text to search
122+
var input: String
123+
124+
// TODO: var output, for validation
125+
126+
/// Whether this is whole string matching or a searching benchmark
127+
///
128+
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
129+
/// accomodate multi-line matching, etc.
130+
var isWhole: Bool = false
131+
132+
func register(_ runner: inout BenchmarkRunner) {
133+
let swiftRegex = try! Regex(regex, as: Substring.self)
134+
135+
let nsPattern = isWhole ? "^" + regex + "$" : regex
136+
let nsRegex: NSRegularExpression
137+
if isWhole {
138+
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
139+
} else {
140+
nsRegex = try! NSRegularExpression(pattern: regex)
141+
}
142+
143+
if isWhole {
144+
runner.register(
145+
Benchmark(
146+
name: baseName + "Whole",
147+
regex: swiftRegex,
148+
type: .whole,
149+
target: input))
150+
runner.register(
151+
NSBenchmark(
152+
name: baseName + "Whole_NS",
153+
regex: nsRegex,
154+
type: .first,
155+
target: input))
156+
} else {
157+
runner.register(
158+
Benchmark(
159+
name: baseName + "First",
160+
regex: swiftRegex,
161+
type: .first,
162+
target: input))
163+
runner.register(
164+
Benchmark(
165+
name: baseName + "All",
166+
regex: swiftRegex,
167+
type: .allMatches,
168+
target: input))
169+
runner.register(
170+
NSBenchmark(
171+
name: baseName + "First_NS",
172+
regex: nsRegex,
173+
type: .first,
174+
target: input))
175+
runner.register(
176+
NSBenchmark(
177+
name: baseName + "All_NS",
178+
regex: nsRegex,
179+
type: .allMatches,
180+
target: input))
181+
}
182+
}
183+
}
184+
185+
// TODO: Capture-containing benchmarks
186+
113187
// nom nom nom, consume the argument
114188
@inline(never)
115189
public func blackHole<T>(_ x: T) {

Sources/RegexBenchmark/CLI.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@ struct Runner: ParsableCommand {
1414
func makeRunner() -> BenchmarkRunner {
1515
var benchmark = BenchmarkRunner("RegexBench", samples)
1616
benchmark.addReluctantQuant()
17-
benchmark.addBacktracking()
1817
benchmark.addCSS()
19-
benchmark.addFirstMatch()
18+
benchmark.addNotFound()
2019
benchmark.addGraphemeBreak()
2120
return benchmark
2221
}

Sources/RegexBenchmark/Suite/Backtracking.swift

Lines changed: 0 additions & 45 deletions
This file was deleted.

Sources/RegexBenchmark/Suite/CssRegex.swift

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,11 @@ import _StringProcessing
44
extension BenchmarkRunner {
55
mutating func addCSS() {
66
let r = #"--([a-zA-Z0-9_-]+)\s*:\s*(.*?):"#
7-
8-
let cssRegex = Benchmark(
9-
name: "cssRegex",
10-
regex: try! Regex(r),
11-
ty: .allMatches,
12-
target: Inputs.swiftOrgCSS
13-
)
147

15-
let cssRegexNS = NSBenchmark(
16-
name: "cssRegexNS",
17-
regex: try! NSRegularExpression(pattern: r),
18-
ty: .all,
19-
target: Inputs.swiftOrgCSS
20-
)
21-
register(cssRegex)
22-
register(cssRegexNS)
8+
// FIXME: Why is `first` and `all` the same running time?
9+
10+
let css = CrossBenchmark(
11+
baseName: "css", regex: r, input: Inputs.swiftOrgCSS)
12+
css.register(&self)
2313
}
2414
}

Sources/RegexBenchmark/Suite/FirstMatch.swift

Lines changed: 0 additions & 49 deletions
This file was deleted.

Sources/RegexBenchmark/Suite/GraphemeBreak.swift

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,10 @@ extension BenchmarkRunner {
77
mutating func addGraphemeBreak() {
88
let input = Inputs.graphemeBreakData
99
let regex = #"(?:[0-9A-F]+)(?:\.\.(?:[0-9A-F]+))?\s+;\s+(?:\w+).*"#
10-
let type = Substring.self // (Substring, Substring, Substring?, Substring).self
1110

12-
let graphemeBreakFirst = Benchmark(
13-
name: "GraphemeBreakNoCapFirst",
14-
regex: try! Regex(regex, as: type),
15-
ty: .first,
16-
target: input
17-
)
18-
19-
let graphemeBreakAll = Benchmark(
20-
name: "GraphemeBreakNoCapAll",
21-
regex: try! Regex(regex, as: type),
22-
ty: .allMatches,
23-
target: input
24-
)
25-
26-
let graphemeBreakFirstNS = NSBenchmark(
27-
name: "GraphemeBreakNoCapFirstNS",
28-
regex: try! NSRegularExpression(pattern: regex),
29-
ty: .first,
30-
target: input
31-
)
32-
33-
let graphemeBreakAllNS = NSBenchmark(
34-
name: "GraphemeBreakNoCapAllNS",
35-
regex: try! NSRegularExpression(pattern: regex),
36-
ty: .all,
37-
target: input
38-
)
39-
40-
register(graphemeBreakFirst)
41-
register(graphemeBreakAll)
42-
register(graphemeBreakFirstNS)
43-
register(graphemeBreakAllNS)
11+
let benchmark = CrossBenchmark(
12+
baseName: "GraphemeBreakNoCap", regex: regex, input: input)
13+
benchmark.register(&self)
4414
}
4515
}
4616

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import _StringProcessing
2+
import Foundation
3+
4+
extension BenchmarkRunner {
5+
mutating func addNotFound() {
6+
let input = String(repeating: " ", count: 100_000)
7+
8+
let notFound = CrossBenchmark(
9+
baseName: "notFound", regex: "a", input: input)
10+
notFound.register(&self)
11+
12+
let anchoredNotFound = CrossBenchmark(
13+
baseName: "notFound", regex: "^ +a", input: input)
14+
anchoredNotFound.register(&self)
15+
}
16+
}

Sources/RegexBenchmark/Suite/ReluctantQuant.swift

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,28 @@ import RegexBuilder
33

44
extension BenchmarkRunner {
55
mutating func addReluctantQuant() {
6-
let size = 500000
7-
let s = String(repeating: "a", count: size)
8-
9-
let reluctantQuant = Benchmark(
10-
name: "ReluctantQuant",
11-
regex: Regex {
12-
OneOrMore(.any, .reluctant)
13-
},
14-
ty: .whole,
15-
target: s
16-
)
6+
let size = 100_000
7+
let input = String(repeating: "a", count: size)
178

18-
let eagarQuantWithTerminal = Benchmark(
19-
name: "EagarQuantWithTerminal",
20-
regex: Regex {
21-
OneOrMore(.any, .eager)
22-
";"
23-
},
24-
ty: .whole,
25-
target: s + ";"
26-
)
9+
let reluctantQuant = CrossBenchmark(
10+
baseName: "ReluctantQuant",
11+
regex: #".*?"#,
12+
input: input,
13+
isWhole: true)
14+
reluctantQuant.register(&self)
2715

28-
let reluctantQuantWithTerminal = Benchmark(
29-
name: "ReluctantQuantWithTerminal",
30-
regex: Regex {
31-
OneOrMore(.any, .reluctant)
32-
";"
33-
},
34-
ty: .whole,
35-
target: s + ";"
36-
)
37-
38-
register(reluctantQuant)
39-
register(reluctantQuantWithTerminal)
40-
register(eagarQuantWithTerminal)
16+
let eagarQuantWithTerminal = CrossBenchmark(
17+
baseName: "EagarQuantWithTerminal",
18+
regex: #".*;"#,
19+
input: input + ";",
20+
isWhole: true)
21+
eagarQuantWithTerminal.register(&self)
22+
23+
let reluctantQuantWithTerminal = CrossBenchmark(
24+
baseName: "ReluctantQuantWithTerminal",
25+
regex: #".*?;"#,
26+
input: input + ";",
27+
isWhole: true)
28+
reluctantQuantWithTerminal.register(&self)
4129
}
4230
}

0 commit comments

Comments
 (0)