Skip to content

Commit 8ef98b8

Browse files
authored
Merge pull request swiftlang#16119 from milseman/unicode_mistake
[string] Comparison bug fix: Kelvin
2 parents 0799683 + ebbfd8c commit 8ef98b8

File tree

2 files changed

+25
-17
lines changed

2 files changed

+25
-17
lines changed

stdlib/public/core/StringComparison.swift

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -937,26 +937,29 @@ extension _UnmanagedString where CodeUnit == UInt8 {
937937
if _fastPath(
938938
other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII
939939
) {
940-
return .less
941-
}
942-
} else {
943-
let selfASCIIChar = UInt16(self[idx])
944-
_sanityCheck(selfASCIIChar != otherCU, "should be different")
945-
if idx+1 == other.count {
946-
return _lexicographicalCompare(selfASCIIChar, otherCU)
947-
}
948-
if _fastPath(other.hasNormalizationBoundary(after: idx)) {
949-
return _lexicographicalCompare(selfASCIIChar, otherCU)
940+
return .less
950941
}
942+
943+
// Rare pathological case, e.g. Kelvin symbol
944+
var selfIterator = _NormalizedCodeUnitIterator(self)
945+
return selfIterator.compare(with: _NormalizedCodeUnitIterator(other))
946+
}
947+
948+
let selfASCIIChar = UInt16(self[idx])
949+
_sanityCheck(selfASCIIChar != otherCU, "should be different")
950+
if idx+1 == other.count {
951+
return _lexicographicalCompare(selfASCIIChar, otherCU)
952+
}
953+
if _fastPath(other.hasNormalizationBoundary(after: idx)) {
954+
return _lexicographicalCompare(selfASCIIChar, otherCU)
951955
}
952956

953957
//
954958
// Otherwise, need to normalize the segment and then compare
955959
//
956-
let selfASCIIChar = UInt16(self[idx])
957960
return _compareStringsPostSuffix(
958-
selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...]
959-
)
961+
selfASCIIChar: selfASCIIChar, otherUTF16WithLeadingASCII: other[idx...]
962+
)
960963
}
961964
}
962965

@@ -1008,15 +1011,17 @@ extension BidirectionalCollection where Element == UInt16, SubSequence == Self {
10081011
}
10091012
}
10101013

1014+
@inline(never) // @outlined
10111015
private func _compareStringsPostSuffix(
10121016
selfASCIIChar: UInt16,
1013-
otherUTF16: _UnmanagedString<UInt16>
1017+
otherUTF16WithLeadingASCII: _UnmanagedString<UInt16>
10141018
) -> _Ordering {
1015-
let otherCU = otherUTF16[0]
1019+
let otherCU = otherUTF16WithLeadingASCII[0]
10161020
_sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call")
10171021

1018-
let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0)
1019-
let segment = otherUTF16[..<segmentEndIdx]
1022+
let segmentEndIdx = otherUTF16WithLeadingASCII._findNormalizationSegmentEnd(
1023+
startingFrom: 0)
1024+
let segment = otherUTF16WithLeadingASCII[..<segmentEndIdx]
10201025

10211026
// Fast path: If prenormal, we're done.
10221027
if _Normalization._prenormalQuickCheckYes(segment) {

test/stdlib/StringAPI.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ let tests = [
135135
ComparisonTest(.eq, "\u{0301}", "\u{0341}"),
136136
ComparisonTest(.lt, "\u{0301}", "\u{0954}"),
137137
ComparisonTest(.lt, "\u{0341}", "\u{0954}"),
138+
139+
// (U+212A KELVIN SIGN) normalizes to ASCII "K"
140+
ComparisonTest(.eq, "K", "\u{212A}"),
138141
]
139142

140143
func checkStringComparison(

0 commit comments

Comments
 (0)