Skip to content

Commit ca2a52b

Browse files
committed
[benchmark] Setup CharacterPropertiesPrecomputed
Reduced the time to run the setUpFunction from 2.2s to 380ms on my ancient computer… This should fit well under 200ms on more modern machines.
1 parent bfbff45 commit ca2a52b

File tree

2 files changed

+32
-103
lines changed

2 files changed

+32
-103
lines changed

benchmark/single-source/CharacterProperties.swift

Lines changed: 20 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -253,133 +253,57 @@ func setupMemo() {
253253
}
254254

255255
// Precompute whole scalar set
256-
var controlCharactersPrecomputed: Set<UInt32> = {
256+
func precompute(_ charSet: CharacterSet) -> Set<UInt32> {
257257
var result = Set<UInt32>()
258-
for i in 0...0x0010_FFFF {
259-
guard let scalar = UnicodeScalar(i) else { continue }
260-
if controlCharacters.contains(scalar) {
261-
result.insert(scalar.value)
258+
for plane in 0...0x10 {
259+
guard charSet.hasMember(inPlane: UInt8(plane)) else { continue }
260+
let offset = plane &* 0x1_0000
261+
for codePoint in 0...0xFFFF {
262+
guard let scalar = UnicodeScalar(codePoint &+ offset) else { continue }
263+
if charSet.contains(scalar) {
264+
result.insert(scalar.value)
265+
}
262266
}
263267
}
264268
return result
265-
}()
269+
}
270+
var controlCharactersPrecomputed: Set<UInt32> = precompute(controlCharacters)
266271
func isControlPrecomputed(_ c: Character) -> Bool {
267272
return controlCharactersPrecomputed.contains(c.firstScalar.value)
268273
}
269-
var alphanumericsPrecomputed: Set<UInt32> = {
270-
var result = Set<UInt32>()
271-
for i in 0...0x0010_FFFF {
272-
guard let scalar = UnicodeScalar(i) else { continue }
273-
if alphanumerics.contains(scalar) {
274-
result.insert(scalar.value)
275-
}
276-
}
277-
return result
278-
}()
274+
var alphanumericsPrecomputed: Set<UInt32> = precompute(alphanumerics)
279275
func isAlphanumericPrecomputed(_ c: Character) -> Bool {
280276
return alphanumericsPrecomputed.contains(c.firstScalar.value)
281277
}
282-
var lowercaseLettersPrecomputed: Set<UInt32> = {
283-
var result = Set<UInt32>()
284-
for i in 0...0x0010_FFFF {
285-
guard let scalar = UnicodeScalar(i) else { continue }
286-
if lowercaseLetters.contains(scalar) {
287-
result.insert(scalar.value)
288-
}
289-
}
290-
return result
291-
}()
278+
var lowercaseLettersPrecomputed: Set<UInt32> = precompute(lowercaseLetters)
292279
func isLowercasePrecomputed(_ c: Character) -> Bool {
293280
return lowercaseLettersPrecomputed.contains(c.firstScalar.value)
294281
}
295-
var punctuationCharactersPrecomputed: Set<UInt32> = {
296-
var result = Set<UInt32>()
297-
for i in 0...0x0010_FFFF {
298-
guard let scalar = UnicodeScalar(i) else { continue }
299-
if punctuationCharacters.contains(scalar) {
300-
result.insert(scalar.value)
301-
}
302-
}
303-
return result
304-
}()
282+
var punctuationCharactersPrecomputed: Set<UInt32> = precompute(punctuationCharacters)
305283
func isPunctuationPrecomputed(_ c: Character) -> Bool {
306284
return punctuationCharactersPrecomputed.contains(c.firstScalar.value)
307285
}
308-
var whitespacesPrecomputed: Set<UInt32> = {
309-
var result = Set<UInt32>()
310-
for i in 0...0x0010_FFFF {
311-
guard let scalar = UnicodeScalar(i) else { continue }
312-
if whitespaces.contains(scalar) {
313-
result.insert(scalar.value)
314-
}
315-
}
316-
return result
317-
}()
286+
var whitespacesPrecomputed: Set<UInt32> = precompute(whitespaces)
318287
func isWhitespacePrecomputed(_ c: Character) -> Bool {
319288
return whitespacesPrecomputed.contains(c.firstScalar.value)
320289
}
321-
var lettersPrecomputed: Set<UInt32> = {
322-
var result = Set<UInt32>()
323-
for i in 0...0x0010_FFFF {
324-
guard let scalar = UnicodeScalar(i) else { continue }
325-
if letters.contains(scalar) {
326-
result.insert(scalar.value)
327-
}
328-
}
329-
return result
330-
}()
290+
var lettersPrecomputed: Set<UInt32> = precompute(letters)
331291
func isLetterPrecomputed(_ c: Character) -> Bool {
332292
return lettersPrecomputed.contains(c.firstScalar.value)
333293
}
334-
var uppercaseLettersPrecomputed: Set<UInt32> = {
335-
var result = Set<UInt32>()
336-
for i in 0...0x0010_FFFF {
337-
guard let scalar = UnicodeScalar(i) else { continue }
338-
if uppercaseLetters.contains(scalar) {
339-
result.insert(scalar.value)
340-
}
341-
}
342-
return result
343-
}()
294+
var uppercaseLettersPrecomputed: Set<UInt32> = precompute(uppercaseLetters)
344295
func isUppercasePrecomputed(_ c: Character) -> Bool {
345296
return uppercaseLettersPrecomputed.contains(c.firstScalar.value)
346297
}
347-
var decimalDigitsPrecomputed: Set<UInt32> = {
348-
var result = Set<UInt32>()
349-
for i in 0...0x0010_FFFF {
350-
guard let scalar = UnicodeScalar(i) else { continue }
351-
if decimalDigits.contains(scalar) {
352-
result.insert(scalar.value)
353-
}
354-
}
355-
return result
356-
}()
298+
var decimalDigitsPrecomputed: Set<UInt32> = precompute(decimalDigits)
357299
func isDecimalPrecomputed(_ c: Character) -> Bool {
358300
return decimalDigitsPrecomputed.contains(c.firstScalar.value)
359301
}
360-
var newlinesPrecomputed: Set<UInt32> = {
361-
var result = Set<UInt32>()
362-
for i in 0...0x0010_FFFF {
363-
guard let scalar = UnicodeScalar(i) else { continue }
364-
if newlines.contains(scalar) {
365-
result.insert(scalar.value)
366-
}
367-
}
368-
return result
369-
}()
302+
var newlinesPrecomputed: Set<UInt32> = precompute(newlines)
370303
func isNewlinePrecomputed(_ c: Character) -> Bool {
371304
return newlinesPrecomputed.contains(c.firstScalar.value)
372305
}
373-
var capitalizedLettersPrecomputed: Set<UInt32> = {
374-
var result = Set<UInt32>()
375-
for i in 0...0x0010_FFFF {
376-
guard let scalar = UnicodeScalar(i) else { continue }
377-
if capitalizedLetters.contains(scalar) {
378-
result.insert(scalar.value)
379-
}
380-
}
381-
return result
382-
}()
306+
var capitalizedLettersPrecomputed: Set<UInt32> = precompute(capitalizedLetters)
383307
func isCapitalizedPrecomputed(_ c: Character) -> Bool {
384308
return capitalizedLettersPrecomputed.contains(c.firstScalar.value)
385309
}

benchmark/single-source/CharacterProperties.swift.gyb

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,22 @@ func setupMemo() {
104104
}
105105

106106
// Precompute whole scalar set
107-
% for Property, Set in Properties.items():
108-
var ${Set}Precomputed: Set<UInt32> = {
107+
func precompute(_ charSet: CharacterSet) -> Set<UInt32> {
109108
var result = Set<UInt32>()
110-
for i in 0...0x0010_FFFF {
111-
guard let scalar = UnicodeScalar(i) else { continue }
112-
if ${Set}.contains(scalar) {
113-
result.insert(scalar.value)
109+
for plane in 0...0x10 {
110+
guard charSet.hasMember(inPlane: UInt8(plane)) else { continue }
111+
let offset = plane &* 0x1_0000
112+
for codePoint in 0...0xFFFF {
113+
guard let scalar = UnicodeScalar(codePoint &+ offset) else { continue }
114+
if charSet.contains(scalar) {
115+
result.insert(scalar.value)
116+
}
114117
}
115118
}
116119
return result
117-
}()
120+
}
121+
% for Property, Set in Properties.items():
122+
var ${Set}Precomputed: Set<UInt32> = precompute(${Set})
118123
func is${Property}Precomputed(_ c: Character) -> Bool {
119124
return ${Set}Precomputed.contains(c.firstScalar.value)
120125
}

0 commit comments

Comments
 (0)