Skip to content

Commit d3728ac

Browse files
Merge pull request #6850 from allevato/character-performance
[stdlib] Speed up Character.init significantly for small characters.
2 parents 92ef777 + 7741fb5 commit d3728ac

File tree

1 file changed

+40
-13
lines changed

1 file changed

+40
-13
lines changed

stdlib/public/core/Character.swift

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,32 @@ public struct Character :
125125
utf8CodeUnitCount: Builtin.Word,
126126
isASCII: Builtin.Int1
127127
) {
128-
self = Character(
129-
String(
130-
_builtinExtendedGraphemeClusterLiteral: start,
131-
utf8CodeUnitCount: utf8CodeUnitCount,
132-
isASCII: isASCII))
128+
// Most character literals are going to be fewer than eight UTF-8 code
129+
// units; for those, build the small character representation directly.
130+
let maxCodeUnitCount = MemoryLayout<UInt64>.size
131+
if _fastPath(Int(utf8CodeUnitCount) <= maxCodeUnitCount) {
132+
var buffer: UInt64 = ~0
133+
_memcpy(
134+
dest: UnsafeMutableRawPointer(Builtin.addressof(&buffer)),
135+
src: UnsafeMutableRawPointer(start),
136+
size: UInt(utf8CodeUnitCount))
137+
// Copying the bytes directly from the literal into an integer assumes
138+
// little endianness, so convert the copied data into host endianness.
139+
let utf8Chunk = UInt64(littleEndian: buffer)
140+
let bits = maxCodeUnitCount &* 8 &- 1
141+
// Verify that the highest bit isn't set so that we can truncate it to
142+
// 63 bits.
143+
if _fastPath(utf8Chunk & (1 << numericCast(bits)) != 0) {
144+
_representation = .small(Builtin.trunc_Int64_Int63(utf8Chunk._value))
145+
return
146+
}
147+
}
148+
// For anything that doesn't fit in 63 bits, build the large
149+
// representation.
150+
self = Character(_largeRepresentationString: String(
151+
_builtinExtendedGraphemeClusterLiteral: start,
152+
utf8CodeUnitCount: utf8CodeUnitCount,
153+
isASCII: isASCII))
133154
}
134155

135156
/// Creates a character with the specified value.
@@ -183,15 +204,21 @@ public struct Character :
183204
_representation = .small(Builtin.trunc_Int64_Int63(initialUTF8._value))
184205
}
185206
else {
186-
if let native = s._core.nativeBuffer,
187-
native.start == s._core._baseAddress! {
188-
_representation = .large(native._storage)
189-
return
190-
}
191-
var nativeString = ""
192-
nativeString.append(s)
193-
_representation = .large(nativeString._core.nativeBuffer!._storage)
207+
self = Character(_largeRepresentationString: s)
208+
}
209+
}
210+
211+
/// Creates a Character from a String that is already known to require the
212+
/// large representation.
213+
internal init(_largeRepresentationString s: String) {
214+
if let native = s._core.nativeBuffer,
215+
native.start == s._core._baseAddress! {
216+
_representation = .large(native._storage)
217+
return
194218
}
219+
var nativeString = ""
220+
nativeString.append(s)
221+
_representation = .large(nativeString._core.nativeBuffer!._storage)
195222
}
196223

197224
/// Returns the index of the lowest byte that is 0xFF, or 8 if

0 commit comments

Comments
 (0)