Skip to content

Commit 5c5a1e5

Browse files
author
Lance Parker
committed
Unified String comparison strategy for all platforms
1 parent d726280 commit 5c5a1e5

File tree

5 files changed

+1309
-162
lines changed

5 files changed

+1309
-162
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ set(SWIFTLIB_ESSENTIAL
124124
String.swift
125125
StringBridge.swift
126126
StringComparable.swift
127+
StringComparison.swift
127128
StringGuts.swift
128129
StringObject.swift
129130
StringIndex.swift
@@ -133,6 +134,7 @@ set(SWIFTLIB_ESSENTIAL
133134
StringStorage.swift
134135
StringSwitch.swift
135136
StringIndexConversions.swift
137+
StringNormalization.swift
136138
StringUnicodeScalarView.swift
137139
StringUTF16.swift
138140
StringUTF8.swift

stdlib/public/core/GroupInfo.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"StringBridge.swift",
1616
"StringCharacterView.swift",
1717
"StringComparable.swift",
18+
"StringComparison.swift",
1819
"StringObject.swift",
1920
"StringGuts.swift",
2021
"StringGraphemeBreaking.swift",
@@ -23,6 +24,7 @@
2324
"StringIndexConversions.swift",
2425
"StringInterpolation.swift",
2526
"StringLegacy.swift",
27+
"StringNormalization.swift",
2628
"StringRangeReplaceableCollection.swift",
2729
"StringStorage.swift",
2830
"StringSwitch.swift",

stdlib/public/core/StringComparable.swift

Lines changed: 20 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -12,148 +12,6 @@
1212

1313
import SwiftShims
1414

15-
#if _runtime(_ObjC)
16-
/// Compare two strings using the Unicode collation algorithm in the
17-
/// deterministic comparison mode. (The strings which are equivalent according
18-
/// to their NFD form are considered equal. Strings which are equivalent
19-
/// according to the plain Unicode collation algorithm are additionally ordered
20-
/// based on their NFD.)
21-
///
22-
/// See Unicode Technical Standard #10.
23-
///
24-
/// The behavior is equivalent to `NSString.compare()` with default options.
25-
///
26-
/// - returns:
27-
/// * an unspecified value less than zero if `lhs < rhs`,
28-
/// * zero if `lhs == rhs`,
29-
/// * an unspecified value greater than zero if `lhs > rhs`.
30-
@_inlineable // FIXME(sil-serialize-all)
31-
@_silgen_name("swift_stdlib_compareNSStringDeterministicUnicodeCollation")
32-
public func _stdlib_compareNSStringDeterministicUnicodeCollation(
33-
_ lhs: AnyObject, _ rhs: AnyObject
34-
) -> Int32
35-
36-
@_inlineable // FIXME(sil-serialize-all)
37-
@_silgen_name("swift_stdlib_compareNSStringDeterministicUnicodeCollationPtr")
38-
public func _stdlib_compareNSStringDeterministicUnicodeCollationPointer(
39-
_ lhs: OpaquePointer, _ rhs: OpaquePointer
40-
) -> Int32
41-
#endif
42-
43-
#if _runtime(_ObjC)
44-
extension _UnmanagedString where CodeUnit == UInt8 {
45-
/// This is consistent with Foundation, but incorrect as defined by Unicode.
46-
/// Unicode weights some ASCII punctuation in a different order than ASCII
47-
/// value. Such as:
48-
///
49-
/// 0022 ; [*02FF.0020.0002] # QUOTATION MARK
50-
/// 0023 ; [*038B.0020.0002] # NUMBER SIGN
51-
/// 0025 ; [*038C.0020.0002] # PERCENT SIGN
52-
/// 0026 ; [*0389.0020.0002] # AMPERSAND
53-
/// 0027 ; [*02F8.0020.0002] # APOSTROPHE
54-
///
55-
@_inlineable // FIXME(sil-serialize-all)
56-
@_versioned
57-
internal func compareASCII(to other: _UnmanagedString<UInt8>) -> Int {
58-
// FIXME Results should be the same across all platforms.
59-
if self.start == other.start {
60-
return (self.count &- other.count).signum()
61-
}
62-
var cmp = Int(truncatingIfNeeded:
63-
_stdlib_memcmp(
64-
self.rawStart, other.rawStart,
65-
Swift.min(self.count, other.count)))
66-
if cmp == 0 {
67-
cmp = self.count &- other.count
68-
}
69-
return cmp.signum()
70-
}
71-
}
72-
#endif
73-
74-
extension _StringGuts {
75-
76-
//
77-
// FIXME(TODO: JIRA): HACK HACK HACK: Work around for ARC :-(
78-
//
79-
@inline(never)
80-
@effects(readonly)
81-
public
82-
static func _compareDeterministicUnicodeCollation(
83-
_leftUnsafeStringGutsBitPattern leftBits: _RawBitPattern,
84-
_rightUnsafeStringGutsBitPattern rightBits: _RawBitPattern
85-
) -> Int {
86-
let left = _StringGuts(rawBits: leftBits)
87-
let right = _StringGuts(rawBits: rightBits)
88-
return _compareDeterministicUnicodeCollation(
89-
left, 0..<left.count, to: right, 0..<right.count)
90-
}
91-
@inline(never)
92-
@effects(readonly)
93-
public
94-
static func _compareDeterministicUnicodeCollation(
95-
_leftUnsafeStringGutsBitPattern leftBits: _RawBitPattern,
96-
_ leftRange: Range<Int>,
97-
_rightUnsafeStringGutsBitPattern rightBits: _RawBitPattern,
98-
_ rightRange: Range<Int>
99-
) -> Int {
100-
let left = _StringGuts(rawBits: leftBits)
101-
let right = _StringGuts(rawBits: rightBits)
102-
return _compareDeterministicUnicodeCollation(
103-
left, leftRange, to: right, rightRange)
104-
}
105-
106-
/// Compares two slices of strings with the Unicode Collation Algorithm.
107-
@inline(never) // Hide the CF/ICU dependency
108-
@effects(readonly)
109-
public // @testable
110-
static func _compareDeterministicUnicodeCollation(
111-
_ left: _StringGuts, _ leftRange: Range<Int>,
112-
to right: _StringGuts, _ rightRange: Range<Int>) -> Int {
113-
// Note: this operation should be consistent with equality comparison of
114-
// Character.
115-
#if _runtime(_ObjC)
116-
if _fastPath(left._isContiguous && right._isContiguous) {
117-
let l = _NSContiguousString(_unmanaged: left, range: leftRange)
118-
let r = _NSContiguousString(_unmanaged: right, range: rightRange)
119-
return l._unsafeWithNotEscapedSelfPointerPair(r) {
120-
return Int(
121-
_stdlib_compareNSStringDeterministicUnicodeCollationPointer($0, $1))
122-
}
123-
} else {
124-
let l = left._ephemeralCocoaString(leftRange)
125-
let r = right._ephemeralCocoaString(rightRange)
126-
return Int(_stdlib_compareNSStringDeterministicUnicodeCollation(l, r))
127-
}
128-
#else
129-
switch (left.isASCII, right.isASCII) {
130-
case (true, false):
131-
let l = left._unmanagedASCIIView[leftRange]
132-
let r = right._unmanagedUTF16View[rightRange]
133-
return Int(_swift_stdlib_unicode_compare_utf8_utf16(
134-
l.start, Int32(l.count),
135-
r.start, Int32(r.count)))
136-
case (false, true):
137-
// Just invert it and recurse for this case.
138-
return -_compareDeterministicUnicodeCollation(
139-
right, rightRange, to: left, leftRange)
140-
case (false, false):
141-
let l = left._unmanagedUTF16View[leftRange]
142-
let r = right._unmanagedUTF16View[rightRange]
143-
return Int(_swift_stdlib_unicode_compare_utf16_utf16(
144-
l.start, Int32(l.count),
145-
r.start, Int32(r.count)))
146-
case (true, true):
147-
let l = left._unmanagedASCIIView[leftRange]
148-
let r = right._unmanagedASCIIView[rightRange]
149-
return Int(_swift_stdlib_unicode_compare_utf8_utf8(
150-
l.start, Int32(l.count),
151-
r.start, Int32(r.count)))
152-
}
153-
#endif
154-
}
155-
}
156-
15715
extension _StringGuts {
15816
@inline(__always)
15917
@_inlineable
@@ -191,6 +49,10 @@ extension _StringGuts {
19149
internal static func isLess(
19250
_ left: _StringGuts, than right: _StringGuts
19351
) -> Bool {
52+
// Bitwise equality implies string equality
53+
if left._bitwiseEqualTo(right) {
54+
return false
55+
}
19456
return compare(left, to: right) == -1
19557
}
19658

@@ -200,6 +62,10 @@ extension _StringGuts {
20062
_ left: _StringGuts, _ leftRange: Range<Int>,
20163
than right: _StringGuts, _ rightRange: Range<Int>
20264
) -> Bool {
65+
// Bitwise equality implies string equality
66+
if left._bitwiseEqualTo(right) && leftRange == rightRange {
67+
return false
68+
}
20369
return compare(left, leftRange, to: right, rightRange) == -1
20470
}
20571

@@ -211,22 +77,18 @@ extension _StringGuts {
21177
) -> Int {
21278
defer { _fixLifetime(left) }
21379
defer { _fixLifetime(right) }
214-
#if _runtime(_ObjC)
215-
// We only want to perform this optimization on objc runtimes. Elsewhere,
216-
// we will make it follow the unicode collation algorithm even for ASCII.
217-
// This is consistent with Foundation, but incorrect as defined by Unicode.
218-
//
219-
// FIXME: String ordering should be consistent across all platforms.
80+
22081
if left.isASCII && right.isASCII {
22182
let leftASCII = left._unmanagedASCIIView[leftRange]
22283
let rightASCII = right._unmanagedASCIIView[rightRange]
22384
let result = leftASCII.compareASCII(to: rightASCII)
22485
return result
22586
}
226-
#endif
227-
return _compareDeterministicUnicodeCollation(
228-
_leftUnsafeStringGutsBitPattern: left.rawBits, leftRange,
229-
_rightUnsafeStringGutsBitPattern: right.rawBits, rightRange)
87+
88+
let leftBits = left.rawBits
89+
let rightBits = right.rawBits
90+
91+
return _compareUnicode(leftBits, leftRange, rightBits, rightRange)
23092
}
23193

23294
@_inlineable
@@ -236,22 +98,18 @@ extension _StringGuts {
23698
) -> Int {
23799
defer { _fixLifetime(left) }
238100
defer { _fixLifetime(right) }
239-
#if _runtime(_ObjC)
240-
// We only want to perform this optimization on objc runtimes. Elsewhere,
241-
// we will make it follow the unicode collation algorithm even for ASCII.
242-
// This is consistent with Foundation, but incorrect as defined by Unicode.
243-
//
244-
// FIXME: String ordering should be consistent across all platforms.
101+
245102
if left.isASCII && right.isASCII {
246103
let leftASCII = left._unmanagedASCIIView
247104
let rightASCII = right._unmanagedASCIIView
248105
let result = leftASCII.compareASCII(to: rightASCII)
249106
return result
250107
}
251-
#endif
252-
return _compareDeterministicUnicodeCollation(
253-
_leftUnsafeStringGutsBitPattern: left.rawBits,
254-
_rightUnsafeStringGutsBitPattern: right.rawBits)
108+
109+
let leftBits = left.rawBits
110+
let rightBits = right.rawBits
111+
112+
return _compareUnicode(leftBits, rightBits)
255113
}
256114
}
257115

0 commit comments

Comments
 (0)