Skip to content

Commit 6118ea5

Browse files
authored
Merge pull request #63043 from lorentey/gb11
[stdlib] String: Fix forward implementation of grapheme breaking rule 11
2 parents 09e8db2 + a3e517e commit 6118ea5

File tree

2 files changed

+47
-20
lines changed

2 files changed

+47
-20
lines changed

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -723,11 +723,17 @@ extension _GraphemeBreakingState {
723723
case (_, .extend),
724724
(_, .zwj):
725725

726-
// If we're currently in an emoji sequence, then extends and ZWJ help
727-
// continue the grapheme cluster by combining more scalars later. If we're
728-
// not currently in an emoji sequence, but our lhs scalar is a pictograph,
729-
// then that's a signal that it's the start of an emoji sequence.
730-
if self.isInEmojiSequence || x == .extendedPictographic {
726+
// Prepare for recognizing GB11, by remembering if we're in an emoji
727+
// sequence.
728+
//
729+
// GB11: Extended_Pictographic Extend* ZWJ × Extended_Pictographic
730+
//
731+
// If our left-side scalar is a pictograph, then it starts a new emoji
732+
// sequence; the sequence continues through subsequent extend/extend and
733+
// extend/zwj pairs.
734+
if (
735+
x == .extendedPictographic || (self.isInEmojiSequence && x == .extend)
736+
) {
731737
enterEmojiSequence = true
732738
}
733739

validation-test/stdlib/StringGraphemeBreaking.swift

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,25 @@ extension String {
3737
}
3838
}
3939

40+
func check(
41+
_ string: String,
42+
_ pieces: [[Unicode.Scalar]],
43+
file: String = #file, line: UInt = #line
44+
) {
45+
expectEqual(
46+
string.forwardPieces, pieces,
47+
"string: \(String(reflecting: string)) (forward)",
48+
file: file, line: line)
49+
expectEqual(
50+
string.backwardPieces, pieces,
51+
"string: \(String(reflecting: string)) (backward)",
52+
file: file, line: line)
53+
}
54+
4055
if #available(SwiftStdlib 5.6, *) {
4156
StringGraphemeBreaking.test("grapheme breaking") {
4257
for test in graphemeBreakTests {
43-
expectEqual(
44-
test.string.forwardPieces, test.pieces,
45-
"string: \(String(reflecting: test.string)) (forward)")
46-
expectEqual(
47-
test.string.backwardPieces, test.pieces,
48-
"string: \(String(reflecting: test.string)) (backward)")
58+
check(test.string, test.pieces)
4959
}
5060
}
5161
}
@@ -65,8 +75,8 @@ class NonContiguousNSString: NSString {
6575
super.init()
6676
}
6777

68-
init(_ value: [UInt16]) {
69-
_value = value
78+
init(_ value: some Sequence<UInt16>) {
79+
_value = Array(value)
7080
super.init()
7181
}
7282

@@ -95,16 +105,27 @@ extension _StringGuts {
95105
if #available(SwiftStdlib 5.6, *) {
96106
StringGraphemeBreaking.test("grapheme breaking foreign") {
97107
for test in graphemeBreakTests {
98-
let foreign = NonContiguousNSString(Array(test.string.utf16))
108+
let foreign = NonContiguousNSString(test.string.utf16)
99109
let string = foreign as String
100110

101111
expectTrue(string._guts._isForeign())
102-
expectEqual(
103-
string.forwardPieces, test.pieces,
104-
"string: \(String(reflecting: test.string)) (forward)")
105-
expectEqual(
106-
string.backwardPieces, test.pieces,
107-
"string: \(String(reflecting: test.string)) (backward)")
112+
check(string, test.pieces)
108113
}
109114
}
110115
}
116+
117+
if #available(SwiftStdlib 5.8, *) {
118+
StringGraphemeBreaking.test("GB11") {
119+
// MAN, ZERO WIDTH JOINER, ZERO WIDTH JOINER, GIRL
120+
let string = "\u{1f468}\u{200d}\u{200d}\u{1f467}"
121+
let pieces: [[Unicode.Scalar]] = [
122+
["\u{1f468}", "\u{200d}", "\u{200d}"],
123+
["\u{1f467}"]
124+
]
125+
check(string, pieces)
126+
127+
let foreign = NonContiguousNSString(string.utf16) as String
128+
expectTrue(foreign._guts._isForeign())
129+
check(foreign, pieces)
130+
}
131+
}

0 commit comments

Comments
 (0)