Skip to content

[stdlib] String: Fix forward implementation of grapheme breaking rule 11 #63043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions stdlib/public/core/StringGraphemeBreaking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,17 @@ extension _GraphemeBreakingState {
case (_, .extend),
(_, .zwj):

// If we're currently in an emoji sequence, then extends and ZWJ help
// continue the grapheme cluster by combining more scalars later. If we're
// not currently in an emoji sequence, but our lhs scalar is a pictograph,
// then that's a signal that it's the start of an emoji sequence.
if self.isInEmojiSequence || x == .extendedPictographic {
// Prepare for recognizing GB11, by remembering if we're in an emoji
// sequence.
//
// GB11: Extended_Pictographic Extend* ZWJ × Extended_Pictographic
//
// If our left-side scalar is a pictograph, then it starts a new emoji
// sequence; the sequence continues through subsequent extend/extend and
// extend/zwj pairs.
if (
x == .extendedPictographic || (self.isInEmojiSequence && x == .extend)
) {
enterEmojiSequence = true
}

Expand Down
51 changes: 36 additions & 15 deletions validation-test/stdlib/StringGraphemeBreaking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,25 @@ extension String {
}
}

func check(
_ string: String,
_ pieces: [[Unicode.Scalar]],
file: String = #file, line: UInt = #line
) {
expectEqual(
string.forwardPieces, pieces,
"string: \(String(reflecting: string)) (forward)",
file: file, line: line)
expectEqual(
string.backwardPieces, pieces,
"string: \(String(reflecting: string)) (backward)",
file: file, line: line)
}

if #available(SwiftStdlib 5.6, *) {
StringGraphemeBreaking.test("grapheme breaking") {
for test in graphemeBreakTests {
expectEqual(
test.string.forwardPieces, test.pieces,
"string: \(String(reflecting: test.string)) (forward)")
expectEqual(
test.string.backwardPieces, test.pieces,
"string: \(String(reflecting: test.string)) (backward)")
check(test.string, test.pieces)
}
}
}
Expand All @@ -65,8 +75,8 @@ class NonContiguousNSString: NSString {
super.init()
}

init(_ value: [UInt16]) {
_value = value
init(_ value: some Sequence<UInt16>) {
_value = Array(value)
super.init()
}

Expand Down Expand Up @@ -95,16 +105,27 @@ extension _StringGuts {
if #available(SwiftStdlib 5.6, *) {
StringGraphemeBreaking.test("grapheme breaking foreign") {
for test in graphemeBreakTests {
let foreign = NonContiguousNSString(Array(test.string.utf16))
let foreign = NonContiguousNSString(test.string.utf16)
let string = foreign as String

expectTrue(string._guts._isForeign())
expectEqual(
string.forwardPieces, test.pieces,
"string: \(String(reflecting: test.string)) (forward)")
expectEqual(
string.backwardPieces, test.pieces,
"string: \(String(reflecting: test.string)) (backward)")
check(string, test.pieces)
}
}
}

if #available(SwiftStdlib 5.8, *) {
StringGraphemeBreaking.test("GB11") {
// MAN, ZERO WIDTH JOINER, ZERO WIDTH JOINER, GIRL
let string = "\u{1f468}\u{200d}\u{200d}\u{1f467}"
let pieces: [[Unicode.Scalar]] = [
["\u{1f468}", "\u{200d}", "\u{200d}"],
["\u{1f467}"]
]
check(string, pieces)

let foreign = NonContiguousNSString(string.utf16) as String
expectTrue(foreign._guts._isForeign())
check(foreign, pieces)
}
}