Skip to content

Commit 5c19427

Browse files
authored
Merge pull request #22560 from milseman/name_brand_is_better
[String] Speed up UTF-8 initialization from non-stdlib types
2 parents e53b719 + 8672455 commit 5c19427

File tree

3 files changed

+30
-11
lines changed

3 files changed

+30
-11
lines changed

benchmark/single-source/DataBenchmarks.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ public let DataBenchmarks = [
164164
BenchmarkInfo(name: "DataToStringMedium",
165165
runFunction: { string($0*200, from: mediumData) }, tags: d,
166166
legacyFactor: 50),
167+
BenchmarkInfo(name: "DataToStringLargeUnicode",
168+
runFunction: { string($0*200, from: largeUnicodeData) }, tags: d,
169+
legacyFactor: 50),
167170

168171
BenchmarkInfo(name: "StringToDataEmpty",
169172
runFunction: { data($0*200, from: emptyString) }, tags: d,
@@ -174,6 +177,9 @@ public let DataBenchmarks = [
174177
BenchmarkInfo(name: "StringToDataMedium",
175178
runFunction: { data($0*200, from: mediumString) }, tags: d,
176179
legacyFactor: 50),
180+
BenchmarkInfo(name: "StringToDataLargeUnicode",
181+
runFunction: { data($0*200, from: largeUnicodeString) }, tags: d,
182+
legacyFactor: 50),
177183

178184
BenchmarkInfo(name: "Data.hash.Empty",
179185
runFunction: { hash($0*10_000, data: Data()) }, tags: d),
@@ -186,9 +192,12 @@ public let DataBenchmarks = [
186192
let emptyString = ""
187193
let smallString = "\r\n"
188194
let mediumString = "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n"
195+
let largeUnicodeString =
196+
"Swiftに大幅な改良が施され、𓀀𓀁𓀂𓀃, 🇺🇸🇨🇦🇲🇽" + mediumString
189197
let emptyData = Data()
190198
let smallData = Data(smallString.utf8)
191199
let mediumData = Data(mediumString.utf8)
200+
let largeUnicodeData = Data(largeUnicodeString.utf8)
192201

193202
let small = sampleData(.small)
194203
let medium = sampleData(.medium)

stdlib/public/core/String.swift

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,13 @@ extension String {
401401
public init<C: Collection, Encoding: Unicode.Encoding>(
402402
decoding codeUnits: C, as sourceEncoding: Encoding.Type
403403
) where C.Iterator.Element == Encoding.CodeUnit {
404+
guard _fastPath(sourceEncoding == UTF8.self) else {
405+
self = String._fromCodeUnits(
406+
codeUnits, encoding: sourceEncoding, repair: true)!.0
407+
return
408+
}
409+
404410
if let contigBytes = codeUnits as? _HasContiguousBytes,
405-
sourceEncoding == UTF8.self,
406411
contigBytes._providesContiguousBytesNoCopy
407412
{
408413
self = contigBytes.withUnsafeBytes { rawBufPtr in
@@ -414,8 +419,13 @@ extension String {
414419
return
415420
}
416421

417-
self = String._fromCodeUnits(
418-
codeUnits, encoding: sourceEncoding, repair: true)!.0
422+
// Just copying to an Array is significantly faster than performing
423+
// generic operations
424+
self = Array(codeUnits).withUnsafeBufferPointer {
425+
let raw = UnsafeRawBufferPointer($0)
426+
return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self)).0
427+
}
428+
return
419429
}
420430

421431
/// Calls the given closure with a pointer to the contents of the string,

stdlib/public/core/StringObject.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -499,20 +499,20 @@ extension _StringObject {
499499
├───┬───┬───┬───┬───┬───┬───┬───┼───┬───┬────┬────┬────┬────┬────┬────────────┤
500500
│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │ 12 │ 13 │ 14 │ 15 │
501501
├───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────┼────────────┤
502-
│ a │ b │ c │ d │ e │ f │ g │ h │ i │ j │ k │ l │ m │ n │ o │ 1x0x count │
502+
│ a │ b │ c │ d │ e │ f │ g │ h │ i │ j │ k │ l │ m │ n │ o │ 1x10 count │
503503
└───┴───┴───┴───┴───┴───┴───┴───┴───┴───┴────┴────┴────┴────┴────┴────────────┘
504504

505505
On 32-bit platforms, we have less space to store code units, and it isn't
506506
contiguous. However, we still use the above layout for the RawBitPattern
507507
representation.
508508

509-
┌───────────────┬───────────────────┬───────┬─────────┐
510-
│ _count │_variant .immortal │_discr │ _flags │
511-
├───┬───┬───┬───┼───┬───┬───┬───┬───┼───────┼────┬────┤
512-
│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │
513-
├───┼───┼───┼───┼───┴───┴───┴───┴───┼───────┼────┼────┤
514-
│ a │ b │ c │ d │ e f g h │x10 cnt│ i │ j │
515-
└───┴───┴───┴───┴───────────────────┴───────┴────┴────┘
509+
┌───────────────┬───────────────────┬───────┬─────────┐
510+
│ _count │_variant .immortal │ _discr │ _flags │
511+
├───┬───┬───┬───┼───┬───┬───┬───┬───┼───────┼────┬────┤
512+
│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │
513+
├───┼───┼───┼───┼───┴───┴───┴───┴───┼───────┼────┼────┤
514+
│ a │ b │ c │ d │ e f g h │1x10 cnt│ i │ j │
515+
└───┴───┴───┴───┴───────────────────┴───────┴────┴────┘
516516

517517
*/
518518
extension _StringObject {

0 commit comments

Comments
 (0)