Skip to content

Commit 65ebcc8

Browse files
committed
Speed up Character.init for small characters.
This optimization checks to see if a Character can fit in the 63-bit small representation instead of unconditionally constructing a String and paying the cost of that allocation. If it does, the small representation is computed directly from its UTF-8 code units. In optimized builds, this turns Character literals <= 8 UTF-8 code units long into single 64-bit integer loads -- a huge improvement.
1 parent c49be07 commit 65ebcc8

File tree

1 file changed

+37
-13
lines changed

1 file changed

+37
-13
lines changed

stdlib/public/core/Character.swift

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,29 @@ public struct Character :
125125
utf8CodeUnitCount: Builtin.Word,
126126
isASCII: Builtin.Int1
127127
) {
128-
self = Character(
129-
String(
130-
_builtinExtendedGraphemeClusterLiteral: start,
131-
utf8CodeUnitCount: utf8CodeUnitCount,
132-
isASCII: isASCII))
128+
// Most character literals are going to be fewer than eight UTF-8 code
129+
// units; for those, build the small character representation directly.
130+
if _fastPath(Int(utf8CodeUnitCount) <= 8) {
131+
var buffer: UInt64 = ~0
132+
_memcpy(
133+
dest: UnsafeMutableRawPointer(Builtin.addressof(&buffer)),
134+
src: UnsafeMutableRawPointer(start),
135+
size: UInt(utf8CodeUnitCount))
136+
let utf8Chunk = UInt64(littleEndian: buffer)
137+
let bits = MemoryLayout.size(ofValue: utf8Chunk) &* 8 &- 1
138+
// Verify that the highest bit isn't set so that we can truncate it to
139+
// 63 bits.
140+
if _fastPath(utf8Chunk & (1 << numericCast(bits)) != 0) {
141+
_representation = .small(Builtin.trunc_Int64_Int63(utf8Chunk._value))
142+
return
143+
}
144+
}
145+
// For anything that doesn't fit in 63 bits, build the large
146+
// representation.
147+
self = Character(_largeRepresentationString: String(
148+
_builtinExtendedGraphemeClusterLiteral: start,
149+
utf8CodeUnitCount: utf8CodeUnitCount,
150+
isASCII: isASCII))
133151
}
134152

135153
/// Creates a character with the specified value.
@@ -183,15 +201,21 @@ public struct Character :
183201
_representation = .small(Builtin.trunc_Int64_Int63(initialUTF8._value))
184202
}
185203
else {
186-
if let native = s._core.nativeBuffer,
187-
native.start == s._core._baseAddress! {
188-
_representation = .large(native._storage)
189-
return
190-
}
191-
var nativeString = ""
192-
nativeString.append(s)
193-
_representation = .large(nativeString._core.nativeBuffer!._storage)
204+
self = Character(_largeRepresentationString: s)
205+
}
206+
}
207+
208+
/// Creates a Character from a String that is already known to require the
209+
/// large representation.
210+
internal init(_largeRepresentationString s: String) {
211+
if let native = s._core.nativeBuffer,
212+
native.start == s._core._baseAddress! {
213+
_representation = .large(native._storage)
214+
return
194215
}
216+
var nativeString = ""
217+
nativeString.append(s)
218+
_representation = .large(nativeString._core.nativeBuffer!._storage)
195219
}
196220

197221
/// Returns the index of the lowest byte that is 0xFF, or 8 if

0 commit comments

Comments
 (0)