Skip to content

Commit 1edc3be

Browse files
authored
Merge pull request #17213 from milseman/utf8_cre8
[benchmark] Add String.init UTF-8 decoding benchmarks
2 parents 7938272 + 15e869b commit 1edc3be

File tree

1 file changed

+97
-16
lines changed

1 file changed

+97
-16
lines changed

benchmark/single-source/UTF8Decode.swift

Lines changed: 97 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,61 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
import TestsUtils
14+
import Foundation
15+
16+
public let UTF8Decode = [
17+
BenchmarkInfo(
18+
name: "UTF8Decode",
19+
runFunction: run_UTF8Decode,
20+
tags: [.validation, .api, .String]),
21+
BenchmarkInfo(
22+
name: "UTF8Decode_InitFromData",
23+
runFunction: run_UTF8Decode_InitFromData,
24+
tags: [.validation, .api, .String]),
25+
BenchmarkInfo(
26+
name: "UTF8Decode_InitDecoding",
27+
runFunction: run_UTF8Decode_InitDecoding,
28+
tags: [.validation, .api, .String]),
29+
BenchmarkInfo(
30+
name: "UTF8Decode_InitFromBytes",
31+
runFunction: run_UTF8Decode_InitFromBytes,
32+
tags: [.validation, .api, .String]),
33+
BenchmarkInfo(
34+
name: "UTF8Decode_InitFromData_ascii",
35+
runFunction: run_UTF8Decode_InitFromData_ascii,
36+
tags: [.validation, .api, .String]),
37+
BenchmarkInfo(
38+
name: "UTF8Decode_InitDecoding_ascii",
39+
runFunction: run_UTF8Decode_InitDecoding_ascii,
40+
tags: [.validation, .api, .String]),
41+
BenchmarkInfo(
42+
name: "UTF8Decode_InitFromBytes_ascii",
43+
runFunction: run_UTF8Decode_InitFromBytes_ascii,
44+
tags: [.validation, .api, .String]),
45+
]
46+
47+
// 1-byte sequences
48+
// This test case is the longest as it's the most performance sensitive.
49+
let ascii = "Swift is a multi-paradigm, compiled programming language created for iOS, OS X, watchOS, tvOS and Linux development by Apple Inc. Swift is designed to work with Apple's Cocoa and Cocoa Touch frameworks and the large body of existing Objective-C code written for Apple products. Swift is intended to be more resilient to erroneous code (\"safer\") than Objective-C and also more concise. It is built with the LLVM compiler framework included in Xcode 6 and later and uses the Objective-C runtime, which allows C, Objective-C, C++ and Swift code to run within a single program."
50+
let asciiBytes: [UInt8] = Array(ascii.utf8)
51+
let asciiData: Data = Data(asciiBytes)
52+
53+
// 2-byte sequences
54+
let russian = "Ру́сский язы́к один из восточнославянских языков, национальный язык русского народа."
55+
// 3-byte sequences
56+
let japanese = "日本語(にほんご、にっぽんご)は、主に日本国内や日本人同士の間で使われている言語である。"
57+
// 4-byte sequences
58+
// Most commonly emoji, which are usually mixed with other text.
59+
let emoji = "Panda 🐼, Dog 🐶, Cat 🐱, Mouse 🐭."
60+
61+
let allStrings = [ascii, russian, japanese, emoji].map { Array($0.utf8) }
62+
let allStringsBytes: [UInt8] = Array(allStrings.joined())
63+
let allStringsData: Data = Data(allStringsBytes)
1464

15-
public let UTF8Decode = BenchmarkInfo(
16-
name: "UTF8Decode",
17-
runFunction: run_UTF8Decode,
18-
tags: [.validation, .api, .String])
1965

2066
@inline(never)
2167
public func run_UTF8Decode(_ N: Int) {
22-
// 1-byte sequences
23-
// This test case is the longest as it's the most performance sensitive.
24-
let ascii = "Swift is a multi-paradigm, compiled programming language created for iOS, OS X, watchOS, tvOS and Linux development by Apple Inc. Swift is designed to work with Apple's Cocoa and Cocoa Touch frameworks and the large body of existing Objective-C code written for Apple products. Swift is intended to be more resilient to erroneous code (\"safer\") than Objective-C and also more concise. It is built with the LLVM compiler framework included in Xcode 6 and later and uses the Objective-C runtime, which allows C, Objective-C, C++ and Swift code to run within a single program."
25-
// 2-byte sequences
26-
let russian = "Ру́сский язы́к один из восточнославянских языков, национальный язык русского народа."
27-
// 3-byte sequences
28-
let japanese = "日本語(にほんご、にっぽんご)は、主に日本国内や日本人同士の間で使われている言語である。"
29-
// 4-byte sequences
30-
// Most commonly emoji, which are usually mixed with other text.
31-
let emoji = "Panda 🐼, Dog 🐶, Cat 🐱, Mouse 🐭."
32-
33-
let strings = [ascii, russian, japanese, emoji].map { Array($0.utf8) }
68+
let strings = allStrings
3469

3570
func isEmpty(_ result: UnicodeDecodingResult) -> Bool {
3671
switch result {
@@ -49,3 +84,49 @@ public func run_UTF8Decode(_ N: Int) {
4984
}
5085
}
5186
}
87+
88+
@inline(never)
89+
public func run_UTF8Decode_InitFromData(_ N: Int) {
90+
let input = allStringsData
91+
for _ in 0..<200*N {
92+
blackHole(String(data: input, encoding: .utf8))
93+
}
94+
}
95+
@inline(never)
96+
public func run_UTF8Decode_InitDecoding(_ N: Int) {
97+
let input = allStringsBytes
98+
for _ in 0..<200*N {
99+
blackHole(String(decoding: input, as: UTF8.self))
100+
}
101+
}
102+
@inline(never)
103+
public func run_UTF8Decode_InitFromBytes(_ N: Int) {
104+
let input = allStringsBytes
105+
for _ in 0..<200*N {
106+
blackHole(String(bytes: input, encoding: .utf8))
107+
}
108+
}
109+
110+
@inline(never)
111+
public func run_UTF8Decode_InitFromData_ascii(_ N: Int) {
112+
let input = asciiData
113+
for _ in 0..<1_000*N {
114+
blackHole(String(data: input, encoding: .utf8))
115+
}
116+
}
117+
@inline(never)
118+
public func run_UTF8Decode_InitDecoding_ascii(_ N: Int) {
119+
let input = asciiBytes
120+
for _ in 0..<1_000*N {
121+
blackHole(String(decoding: input, as: UTF8.self))
122+
}
123+
}
124+
@inline(never)
125+
public func run_UTF8Decode_InitFromBytes_ascii(_ N: Int) {
126+
let input = asciiBytes
127+
for _ in 0..<1_000*N {
128+
blackHole(String(bytes: input, encoding: .utf8))
129+
}
130+
}
131+
132+

0 commit comments

Comments
 (0)