@@ -38,6 +38,13 @@ public func _stdlib_compareNSStringDeterministicUnicodeCollationPointer(
38
38
) -> Int32
39
39
#endif
40
40
41
+ @_silgen_name ( " _swift_string_memcmp " )
42
+ func _swift_string_memcmp(
43
+ _ s1: UnsafeMutableRawPointer ,
44
+ _ s2: UnsafeMutableRawPointer ,
45
+ _ n: Int
46
+ ) -> Int
47
+
41
48
extension String {
42
49
#if _runtime(_ObjC)
43
50
/// This is consistent with Foundation, but incorrect as defined by Unicode.
@@ -65,11 +72,67 @@ extension String {
65
72
}
66
73
#endif
67
74
68
- /// Compares two strings with the Unicode Collation Algorithm.
75
+ private
76
+ func _compareCodeUnitsASCII( _ rhs: String ) -> Int {
77
+ let n = min ( _core. count, rhs. _core. count)
78
+ let selfStart = UnsafeMutableRawPointer ( _core. startASCII)
79
+ let rhsStart = UnsafeMutableRawPointer ( rhs. _core. startASCII)
80
+ let firstDiff = _swift_string_memcmp ( selfStart, rhsStart, n)
81
+ if _core. count == rhs. _core. count && firstDiff == n {
82
+ return 0
83
+ }
84
+ return _compareString ( rhs, offset: firstDiff)
85
+ }
86
+
69
87
@inline ( never)
70
- @_semantics ( " stdlib_binary_only " ) // Hide the CF/ICU dependency
88
+ @_semantics ( " stdlib_binary_only " ) // Hide the ICU dependency
89
+ private
90
+ func _compareCodeUnitsUTF16( _ rhs: String ) -> Int {
91
+ let n = min ( _core. count, rhs. _core. count) << _core. elementShift
92
+ let selfStart = UnsafeMutableRawPointer ( _core. startUTF16)
93
+ let rhsStart = UnsafeMutableRawPointer ( rhs. _core. startUTF16)
94
+ var firstDiff = _swift_string_memcmp ( selfStart, rhsStart, n)
95
+ if _core. count == rhs. _core. count && firstDiff == n {
96
+ return 0
97
+ }
98
+ // At this point we have to fall back to the UCA.
99
+ // In order to properly order contractions and surrogate pairs we can't
100
+ // invoke the UCA with UTF16 strings that start in the middle of a contraction
101
+ // or surrogate pair. Rather than carry out a lot of expensive operations to
102
+ // figure out if we're in the middle of a contraction or surrogate pair, we
103
+ // simply step back a fixed number of code units, equal to the longest
104
+ // possible contraction, or the length of a surrogate pair (2), whichever is
105
+ // greater, minus 1 (while taking care that we don't step back past the start
106
+ // of the strings).
107
+ // This will produce a correct result at the cost of re-comparing a few
108
+ // characters that we know are equal, which is likely much cheaper than
109
+ // calculating a more precise number of code units to step back.
110
+ firstDiff = firstDiff >> _core. elementShift
111
+ let surrogateLength = 2
112
+ let stepBack = max ( Int ( _swift_stdlib_unicode_find_longest_contraction ( ) ) , surrogateLength) - 1
113
+ firstDiff = firstDiff >= stepBack ? firstDiff - stepBack : 0
114
+ return _compareString ( rhs, offset: firstDiff)
115
+ }
116
+
117
+ public // @testable
118
+ func _compareCodeUnits( _ rhs: String ) -> Int {
119
+ if _core. isASCII == rhs. _core. isASCII &&
120
+ _core. hasContiguousStorage && rhs. _core. hasContiguousStorage {
121
+ return _core. isASCII ? _compareCodeUnitsASCII ( rhs) : _compareCodeUnitsUTF16 ( rhs)
122
+ }
123
+ return _compareString ( rhs)
124
+ }
125
+
126
+ /// Compares two strings with the Unicode Collation Algorithm.
71
127
public // @testable
72
128
func _compareDeterministicUnicodeCollation( _ rhs: String ) -> Int {
129
+ return self . _compareDeterministicUnicodeCollation ( rhs, offset: 0 )
130
+ }
131
+
132
+ @inline ( never)
133
+ @_semantics ( " stdlib_binary_only " ) // Hide the CF/ICU dependency
134
+ public
135
+ func _compareDeterministicUnicodeCollation( _ rhs: String , offset: Int = 0 ) -> Int {
73
136
// Note: this operation should be consistent with equality comparison of
74
137
// Character.
75
138
#if _runtime(_ObjC)
@@ -95,18 +158,18 @@ extension String {
95
158
return - rhs. _compareDeterministicUnicodeCollation ( self )
96
159
case ( false , false ) :
97
160
return Int ( _swift_stdlib_unicode_compare_utf16_utf16 (
98
- _core. startUTF16, Int32 ( _core. count) ,
99
- rhs. _core. startUTF16, Int32 ( rhs. _core. count) ) )
161
+ _core. startUTF16 + offset , Int32 ( _core. count - offset ) ,
162
+ rhs. _core. startUTF16 + offset , Int32 ( rhs. _core. count - offset ) ) )
100
163
case ( true , true ) :
101
164
return Int ( _swift_stdlib_unicode_compare_utf8_utf8 (
102
- _core. startASCII, Int32 ( _core. count) ,
103
- rhs. _core. startASCII, Int32 ( rhs. _core. count) ) )
165
+ _core. startASCII + offset , Int32 ( _core. count - offset ) ,
166
+ rhs. _core. startASCII + offset , Int32 ( rhs. _core. count - offset ) ) )
104
167
}
105
168
#endif
106
169
}
107
170
108
171
public // @testable
109
- func _compareString( _ rhs: String ) -> Int {
172
+ func _compareString( _ rhs: String , offset : Int = 0 ) -> Int {
110
173
#if _runtime(_ObjC)
111
174
// We only want to perform this optimization on objc runtimes. Elsewhere,
112
175
// we will make it follow the unicode collation algorithm even for ASCII.
@@ -115,7 +178,7 @@ extension String {
115
178
return _compareASCII ( rhs)
116
179
}
117
180
#endif
118
- return _compareDeterministicUnicodeCollation ( rhs)
181
+ return _compareDeterministicUnicodeCollation ( rhs, offset : offset )
119
182
}
120
183
}
121
184
@@ -133,14 +196,16 @@ extension String : Equatable {
133
196
lhs. _core. startASCII, rhs. _core. startASCII,
134
197
rhs. _core. count) == 0
135
198
}
136
- #endif
137
199
return lhs. _compareString ( rhs) == 0
200
+ #else
201
+ return lhs. _compareCodeUnits ( rhs) == 0
202
+ #endif
138
203
}
139
204
}
140
205
141
206
extension String : Comparable {
142
207
public static func < ( lhs: String , rhs: String ) -> Bool {
143
- return lhs. _compareString ( rhs) < 0
208
+ return lhs. _compareCodeUnits ( rhs) < 0
144
209
}
145
210
}
146
211
0 commit comments