Skip to content

Commit bd5189c

Browse files
committed
[String] Grapheme fast paths for punctuation: 5-8x speedup.
Many strings use non-sub-300 punctuation characters (e.g. unicode hyphen, CJK quotes, etc). This can cause switching between fast and slow paths for grapheme breaking. Add in fast-paths for general punctuation characters and CJK punctuation and symbol characters. This results in about a 5-8x speedup for heavily (unicode) punctuated Latiny and CJKy workloads.
1 parent 7580d6a commit bd5189c

File tree

4 files changed

+207
-25
lines changed

4 files changed

+207
-25
lines changed

benchmark/single-source/StringWalk.swift

Lines changed: 176 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,11 @@
1616
// scripts/generate_harness/generate_harness.py to regenerate this file.
1717
////////////////////////////////////////////////////////////////////////////////
1818

19-
20-
// Test String subscript performance.
2119
//
22-
// Subscript has a slow path that initializes a global variable:
23-
// Swift._cocoaStringSubscript.addressor. Global optimization would
24-
// normally hoist the initializer outside the inner loop (over
25-
// unicodeScalars), forcing the initializer to be called on each
26-
// lap. However, no that the cocoa code is properly marked "slowPath",
27-
// no hoisting should occur.
20+
// Test String iteration performance over a variety of workloads, languages,
21+
// and symbols.
22+
//
23+
2824
import TestsUtils
2925

3026
var count: Int = 0
@@ -70,6 +66,8 @@ let japanese = "今回のアップデートでSwiftに大幅な改良が施さ
7066
let chinese = "Swift 是面向 Apple 平台的编程语言,功能强大且直观易用,而本次更新对其进行了全面优化。"
7167
let korean = "이번 업데이트에서는 강력하면서도 직관적인 Apple 플랫폼용 프로그래밍 언어인 Swift를 완벽히 개선하였습니다."
7268
let russian = "в чащах юга жил-был цитрус? да, но фальшивый экземпляр"
69+
let punctuated = "\u{201c}Hello\u{2010}world\u{2026}\u{201d}"
70+
let punctuatedJapanese = "\u{300c}\u{300e}今日は\u{3001}世界\u{3002}\u{300f}\u{300d}"
7371

7472
// A workload that's mostly Latin characters, with occasional emoji
7573
// interspersed. Common for tweets.
@@ -91,7 +89,6 @@ let unicodeScalarsMultiplier = baseMultiplier
9189
let charactersMultiplier = baseMultiplier / 5
9290

9391

94-
9592
@inline(never)
9693
public func run_StringWalk_ascii_unicodeScalars(_ N: Int) {
9794
for _ in 1...unicodeScalarsMultiplier*N {
@@ -177,7 +174,6 @@ public func run_CharIndexing_ascii_unicodeScalars_Backwards(_ N: Int) {
177174

178175

179176

180-
181177
@inline(never)
182178
public func run_StringWalk_utf16_unicodeScalars(_ N: Int) {
183179
for _ in 1...unicodeScalarsMultiplier*N {
@@ -263,7 +259,6 @@ public func run_CharIndexing_utf16_unicodeScalars_Backwards(_ N: Int) {
263259

264260

265261

266-
267262
@inline(never)
268263
public func run_StringWalk_tweet_unicodeScalars(_ N: Int) {
269264
for _ in 1...unicodeScalarsMultiplier*N {
@@ -349,7 +344,6 @@ public func run_CharIndexing_tweet_unicodeScalars_Backwards(_ N: Int) {
349344

350345

351346

352-
353347
@inline(never)
354348
public func run_StringWalk_japanese_unicodeScalars(_ N: Int) {
355349
for _ in 1...unicodeScalarsMultiplier*N {
@@ -435,7 +429,6 @@ public func run_CharIndexing_japanese_unicodeScalars_Backwards(_ N: Int) {
435429

436430

437431

438-
439432
@inline(never)
440433
public func run_StringWalk_chinese_unicodeScalars(_ N: Int) {
441434
for _ in 1...unicodeScalarsMultiplier*N {
@@ -521,7 +514,6 @@ public func run_CharIndexing_chinese_unicodeScalars_Backwards(_ N: Int) {
521514

522515

523516

524-
525517
@inline(never)
526518
public func run_StringWalk_korean_unicodeScalars(_ N: Int) {
527519
for _ in 1...unicodeScalarsMultiplier*N {
@@ -607,7 +599,6 @@ public func run_CharIndexing_korean_unicodeScalars_Backwards(_ N: Int) {
607599

608600

609601

610-
611602
@inline(never)
612603
public func run_StringWalk_russian_unicodeScalars(_ N: Int) {
613604
for _ in 1...unicodeScalarsMultiplier*N {
@@ -692,3 +683,173 @@ public func run_CharIndexing_russian_unicodeScalars_Backwards(_ N: Int) {
692683

693684

694685

686+
687+
@inline(never)
688+
public func run_StringWalk_punctuated_unicodeScalars(_ N: Int) {
689+
for _ in 1...unicodeScalarsMultiplier*N {
690+
count_unicodeScalars(punctuated.unicodeScalars)
691+
}
692+
}
693+
694+
@inline(never)
695+
public func run_StringWalk_punctuated_unicodeScalars_Backwards(_ N: Int) {
696+
for _ in 1...unicodeScalarsMultiplier*N {
697+
count_unicodeScalars_rev(punctuated.unicodeScalars.reversed())
698+
}
699+
}
700+
701+
702+
703+
704+
@inline(never)
705+
public func run_StringWalk_punctuated_characters(_ N: Int) {
706+
for _ in 1...charactersMultiplier*N {
707+
count_characters(punctuated.characters)
708+
}
709+
}
710+
711+
@inline(never)
712+
public func run_StringWalk_punctuated_characters_Backwards(_ N: Int) {
713+
for _ in 1...charactersMultiplier*N {
714+
count_characters_rev(punctuated.characters.reversed())
715+
}
716+
}
717+
718+
719+
720+
721+
let punctuatedCharacters = Array(punctuated)
722+
723+
@inline(never)
724+
public func run_CharIteration_punctuated_unicodeScalars(_ N: Int) {
725+
for _ in 1...unicodeScalarsMultiplier*N {
726+
for c in punctuatedCharacters {
727+
for u in c.unicodeScalars {
728+
count |= Int(u.value)
729+
}
730+
}
731+
}
732+
}
733+
734+
@inline(never)
735+
public func run_CharIteration_punctuated_unicodeScalars_Backwards(_ N: Int) {
736+
for _ in 1...unicodeScalarsMultiplier*N {
737+
for c in punctuatedCharacters {
738+
for u in c.unicodeScalars.reversed() {
739+
count |= Int(u.value)
740+
}
741+
}
742+
}
743+
}
744+
745+
@inline(never)
746+
public func run_CharIndexing_punctuated_unicodeScalars(_ N: Int) {
747+
for _ in 1...unicodeScalarsMultiplier*N {
748+
for c in punctuatedCharacters {
749+
let s = c.unicodeScalars
750+
for i in s.indices {
751+
count |= Int(s[i].value)
752+
}
753+
}
754+
}
755+
}
756+
757+
@inline(never)
758+
public func run_CharIndexing_punctuated_unicodeScalars_Backwards(_ N: Int) {
759+
for _ in 1...unicodeScalarsMultiplier*N {
760+
for c in punctuatedCharacters {
761+
let s = c.unicodeScalars
762+
for i in s.indices.reversed() {
763+
count |= Int(s[i].value)
764+
}
765+
}
766+
}
767+
}
768+
769+
770+
771+
772+
@inline(never)
773+
public func run_StringWalk_punctuatedJapanese_unicodeScalars(_ N: Int) {
774+
for _ in 1...unicodeScalarsMultiplier*N {
775+
count_unicodeScalars(punctuatedJapanese.unicodeScalars)
776+
}
777+
}
778+
779+
@inline(never)
780+
public func run_StringWalk_punctuatedJapanese_unicodeScalars_Backwards(_ N: Int) {
781+
for _ in 1...unicodeScalarsMultiplier*N {
782+
count_unicodeScalars_rev(punctuatedJapanese.unicodeScalars.reversed())
783+
}
784+
}
785+
786+
787+
788+
789+
@inline(never)
790+
public func run_StringWalk_punctuatedJapanese_characters(_ N: Int) {
791+
for _ in 1...charactersMultiplier*N {
792+
count_characters(punctuatedJapanese.characters)
793+
}
794+
}
795+
796+
@inline(never)
797+
public func run_StringWalk_punctuatedJapanese_characters_Backwards(_ N: Int) {
798+
for _ in 1...charactersMultiplier*N {
799+
count_characters_rev(punctuatedJapanese.characters.reversed())
800+
}
801+
}
802+
803+
804+
805+
806+
let punctuatedJapaneseCharacters = Array(punctuatedJapanese)
807+
808+
@inline(never)
809+
public func run_CharIteration_punctuatedJapanese_unicodeScalars(_ N: Int) {
810+
for _ in 1...unicodeScalarsMultiplier*N {
811+
for c in punctuatedJapaneseCharacters {
812+
for u in c.unicodeScalars {
813+
count |= Int(u.value)
814+
}
815+
}
816+
}
817+
}
818+
819+
@inline(never)
820+
public func run_CharIteration_punctuatedJapanese_unicodeScalars_Backwards(_ N: Int) {
821+
for _ in 1...unicodeScalarsMultiplier*N {
822+
for c in punctuatedJapaneseCharacters {
823+
for u in c.unicodeScalars.reversed() {
824+
count |= Int(u.value)
825+
}
826+
}
827+
}
828+
}
829+
830+
@inline(never)
831+
public func run_CharIndexing_punctuatedJapanese_unicodeScalars(_ N: Int) {
832+
for _ in 1...unicodeScalarsMultiplier*N {
833+
for c in punctuatedJapaneseCharacters {
834+
let s = c.unicodeScalars
835+
for i in s.indices {
836+
count |= Int(s[i].value)
837+
}
838+
}
839+
}
840+
}
841+
842+
@inline(never)
843+
public func run_CharIndexing_punctuatedJapanese_unicodeScalars_Backwards(_ N: Int) {
844+
for _ in 1...unicodeScalarsMultiplier*N {
845+
for c in punctuatedJapaneseCharacters {
846+
let s = c.unicodeScalars
847+
for i in s.indices.reversed() {
848+
count |= Int(s[i].value)
849+
}
850+
}
851+
}
852+
}
853+
854+
855+

benchmark/single-source/StringWalk.swift.gyb

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@
1717
// scripts/generate_harness/generate_harness.py to regenerate this file.
1818
////////////////////////////////////////////////////////////////////////////////
1919

20-
21-
// Test String subscript performance.
2220
//
23-
// Subscript has a slow path that initializes a global variable:
24-
// Swift._cocoaStringSubscript.addressor. Global optimization would
25-
// normally hoist the initializer outside the inner loop (over
26-
// unicodeScalars), forcing the initializer to be called on each
27-
// lap. However, no that the cocoa code is properly marked "slowPath",
28-
// no hoisting should occur.
21+
// Test String iteration performance over a variety of workloads, languages,
22+
// and symbols.
23+
//
24+
2925
import TestsUtils
3026

3127
var count: Int = 0
@@ -71,6 +67,8 @@ let japanese = "今回のアップデートでSwiftに大幅な改良が施さ
7167
let chinese = "Swift 是面向 Apple 平台的编程语言,功能强大且直观易用,而本次更新对其进行了全面优化。"
7268
let korean = "이번 업데이트에서는 강력하면서도 직관적인 Apple 플랫폼용 프로그래밍 언어인 Swift를 완벽히 개선하였습니다."
7369
let russian = "в чащах юга жил-был цитрус? да, но фальшивый экземпляр"
70+
let punctuated = "\u{201c}Hello\u{2010}world\u{2026}\u{201d}"
71+
let punctuatedJapanese = "\u{300c}\u{300e}今日は\u{3001}世界\u{3002}\u{300f}\u{300d}"
7472

7573
// A workload that's mostly Latin characters, with occasional emoji
7674
// interspersed. Common for tweets.
@@ -91,8 +89,7 @@ let baseMultiplier = 10_000
9189
let unicodeScalarsMultiplier = baseMultiplier
9290
let charactersMultiplier = baseMultiplier / 5
9391

94-
% for Name in ["ascii", "utf16", "tweet", "japanese", "chinese", "korean", "russian"]:
95-
92+
% for Name in ["ascii", "utf16", "tweet", "japanese", "chinese", "korean", "russian", "punctuated", "punctuatedJapanese"]:
9693
% for Kind in ["unicodeScalars", "characters"]:
9794

9895
@inline(never)

benchmark/utils/main.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ addTo(&precommitTests, "CharIndexing_japanese_unicodeScalars", run_CharIndexing_
178178
addTo(&precommitTests, "CharIndexing_japanese_unicodeScalars_Backwards", run_CharIndexing_japanese_unicodeScalars_Backwards)
179179
addTo(&precommitTests, "CharIndexing_korean_unicodeScalars", run_CharIndexing_korean_unicodeScalars)
180180
addTo(&precommitTests, "CharIndexing_korean_unicodeScalars_Backwards", run_CharIndexing_korean_unicodeScalars_Backwards)
181+
addTo(&precommitTests, "CharIndexing_punctuatedJapanese_unicodeScalars", run_CharIndexing_punctuatedJapanese_unicodeScalars)
182+
addTo(&precommitTests, "CharIndexing_punctuatedJapanese_unicodeScalars_Backwards", run_CharIndexing_punctuatedJapanese_unicodeScalars_Backwards)
183+
addTo(&precommitTests, "CharIndexing_punctuated_unicodeScalars", run_CharIndexing_punctuated_unicodeScalars)
184+
addTo(&precommitTests, "CharIndexing_punctuated_unicodeScalars_Backwards", run_CharIndexing_punctuated_unicodeScalars_Backwards)
181185
addTo(&precommitTests, "CharIndexing_russian_unicodeScalars", run_CharIndexing_russian_unicodeScalars)
182186
addTo(&precommitTests, "CharIndexing_russian_unicodeScalars_Backwards", run_CharIndexing_russian_unicodeScalars_Backwards)
183187
addTo(&precommitTests, "CharIndexing_tweet_unicodeScalars", run_CharIndexing_tweet_unicodeScalars)
@@ -192,6 +196,10 @@ addTo(&precommitTests, "CharIteration_japanese_unicodeScalars", run_CharIteratio
192196
addTo(&precommitTests, "CharIteration_japanese_unicodeScalars_Backwards", run_CharIteration_japanese_unicodeScalars_Backwards)
193197
addTo(&precommitTests, "CharIteration_korean_unicodeScalars", run_CharIteration_korean_unicodeScalars)
194198
addTo(&precommitTests, "CharIteration_korean_unicodeScalars_Backwards", run_CharIteration_korean_unicodeScalars_Backwards)
199+
addTo(&precommitTests, "CharIteration_punctuatedJapanese_unicodeScalars", run_CharIteration_punctuatedJapanese_unicodeScalars)
200+
addTo(&precommitTests, "CharIteration_punctuatedJapanese_unicodeScalars_Backwards", run_CharIteration_punctuatedJapanese_unicodeScalars_Backwards)
201+
addTo(&precommitTests, "CharIteration_punctuated_unicodeScalars", run_CharIteration_punctuated_unicodeScalars)
202+
addTo(&precommitTests, "CharIteration_punctuated_unicodeScalars_Backwards", run_CharIteration_punctuated_unicodeScalars_Backwards)
195203
addTo(&precommitTests, "CharIteration_russian_unicodeScalars", run_CharIteration_russian_unicodeScalars)
196204
addTo(&precommitTests, "CharIteration_russian_unicodeScalars_Backwards", run_CharIteration_russian_unicodeScalars_Backwards)
197205
addTo(&precommitTests, "CharIteration_tweet_unicodeScalars", run_CharIteration_tweet_unicodeScalars)
@@ -563,6 +571,14 @@ addTo(&stringTests, "StringWalk_korean_characters", run_StringWalk_korean_charac
563571
addTo(&stringTests, "StringWalk_korean_characters_Backwards", run_StringWalk_korean_characters_Backwards)
564572
addTo(&stringTests, "StringWalk_korean_unicodeScalars", run_StringWalk_korean_unicodeScalars)
565573
addTo(&stringTests, "StringWalk_korean_unicodeScalars_Backwards", run_StringWalk_korean_unicodeScalars_Backwards)
574+
addTo(&stringTests, "StringWalk_punctuatedJapanese_characters", run_StringWalk_punctuatedJapanese_characters)
575+
addTo(&stringTests, "StringWalk_punctuatedJapanese_characters_Backwards", run_StringWalk_punctuatedJapanese_characters_Backwards)
576+
addTo(&stringTests, "StringWalk_punctuatedJapanese_unicodeScalars", run_StringWalk_punctuatedJapanese_unicodeScalars)
577+
addTo(&stringTests, "StringWalk_punctuatedJapanese_unicodeScalars_Backwards", run_StringWalk_punctuatedJapanese_unicodeScalars_Backwards)
578+
addTo(&stringTests, "StringWalk_punctuated_characters", run_StringWalk_punctuated_characters)
579+
addTo(&stringTests, "StringWalk_punctuated_characters_Backwards", run_StringWalk_punctuated_characters_Backwards)
580+
addTo(&stringTests, "StringWalk_punctuated_unicodeScalars", run_StringWalk_punctuated_unicodeScalars)
581+
addTo(&stringTests, "StringWalk_punctuated_unicodeScalars_Backwards", run_StringWalk_punctuated_unicodeScalars_Backwards)
566582
addTo(&stringTests, "StringWalk_russian_characters", run_StringWalk_russian_characters)
567583
addTo(&stringTests, "StringWalk_russian_characters_Backwards", run_StringWalk_russian_characters_Backwards)
568584
addTo(&stringTests, "StringWalk_russian_unicodeScalars", run_StringWalk_russian_unicodeScalars)

stdlib/public/core/StringCharacterView.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,14 @@ extension String.CharacterView : BidirectionalCollection {
344344
// 0xAC00–0xD7AF
345345
case 0xac00...0xd7af: return true
346346

347+
// Common general use punctuation, excluding extenders:
348+
// 0x2010-0x2029
349+
case 0x2010...0x2029: return true
350+
351+
// CJK punctuation characters, excluding extenders:
352+
// 0x3000-0x3029
353+
case 0x3000...0x3029: return true
354+
347355
default: return false
348356
}
349357
}

0 commit comments

Comments
 (0)