@@ -125,11 +125,32 @@ public struct Character :
125
125
utf8CodeUnitCount: Builtin . Word ,
126
126
isASCII: Builtin . Int1
127
127
) {
128
- self = Character (
129
- String (
130
- _builtinExtendedGraphemeClusterLiteral: start,
131
- utf8CodeUnitCount: utf8CodeUnitCount,
132
- isASCII: isASCII) )
128
+ // Most character literals are going to be fewer than eight UTF-8 code
129
+ // units; for those, build the small character representation directly.
130
+ let maxCodeUnitCount = MemoryLayout< UInt64> . size
131
+ if _fastPath ( Int ( utf8CodeUnitCount) <= maxCodeUnitCount) {
132
+ var buffer : UInt64 = ~ 0
133
+ _memcpy (
134
+ dest: UnsafeMutableRawPointer ( Builtin . addressof ( & buffer) ) ,
135
+ src: UnsafeMutableRawPointer ( start) ,
136
+ size: UInt ( utf8CodeUnitCount) )
137
+ // Copying the bytes directly from the literal into an integer assumes
138
+ // little endianness, so convert the copied data into host endianness.
139
+ let utf8Chunk = UInt64 ( littleEndian: buffer)
140
+ let bits = maxCodeUnitCount &* 8 &- 1
141
+ // Verify that the highest bit isn't set so that we can truncate it to
142
+ // 63 bits.
143
+ if _fastPath ( utf8Chunk & ( 1 << numericCast ( bits) ) != 0 ) {
144
+ _representation = . small( Builtin . trunc_Int64_Int63 ( utf8Chunk. _value) )
145
+ return
146
+ }
147
+ }
148
+ // For anything that doesn't fit in 63 bits, build the large
149
+ // representation.
150
+ self = Character ( _largeRepresentationString: String (
151
+ _builtinExtendedGraphemeClusterLiteral: start,
152
+ utf8CodeUnitCount: utf8CodeUnitCount,
153
+ isASCII: isASCII) )
133
154
}
134
155
135
156
/// Creates a character with the specified value.
@@ -183,15 +204,21 @@ public struct Character :
183
204
_representation = . small( Builtin . trunc_Int64_Int63 ( initialUTF8. _value) )
184
205
}
185
206
else {
186
- if let native = s. _core. nativeBuffer,
187
- native. start == s. _core. _baseAddress! {
188
- _representation = . large( native. _storage)
189
- return
190
- }
191
- var nativeString = " "
192
- nativeString. append ( s)
193
- _representation = . large( nativeString. _core. nativeBuffer!. _storage)
207
+ self = Character ( _largeRepresentationString: s)
208
+ }
209
+ }
210
+
211
+ /// Creates a Character from a String that is already known to require the
212
+ /// large representation.
213
+ internal init ( _largeRepresentationString s: String ) {
214
+ if let native = s. _core. nativeBuffer,
215
+ native. start == s. _core. _baseAddress! {
216
+ _representation = . large( native. _storage)
217
+ return
194
218
}
219
+ var nativeString = " "
220
+ nativeString. append ( s)
221
+ _representation = . large( nativeString. _core. nativeBuffer!. _storage)
195
222
}
196
223
197
224
/// Returns the index of the lowest byte that is 0xFF, or 8 if
0 commit comments