Skip to content

Commit 0b307a3

Browse files
Make UTF-8 decode benchmark use more realistic cases
1 parent 6a5fc12 commit 0b307a3

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

benchmark/single-source/UTF8Decode.swift

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,24 @@ import TestsUtils
1414

1515
@inline(never)
1616
public func run_UTF8Decode(N: Int) {
17-
// Use test data with UTF-8 sequences of all lengths, but mainly ASCII.
18-
let string = "Mainly ASCII but also some 2-byte sequences (пример),"
19-
+ " some 3-bytes sequences (성능 테스트) and occasionally a 🐼 (4 bytes)"
20-
let data = Array(string.utf8)
17+
// 1-byte sequences
18+
// This test case is the longest as it's the most perfomance sensitive.
19+
let ascii = "Swift is a multi-paradigm, compiled programming language created for iOS, OS X, watchOS, tvOS and Linux development by Apple Inc. Swift is designed to work with Apple's Cocoa and Cocoa Touch frameworks and the large body of existing Objective-C code written for Apple products. Swift is intended to be more resilient to erroneous code (\"safer\") than Objective-C and also more concise. It is built with the LLVM compiler framework included in Xcode 6 and later and uses the Objective-C runtime, which allows C, Objective-C, C++ and Swift code to run within a single program."
20+
// 2-byte sequences
21+
let russian = "Ру́сский язы́к один из восточнославянских языков, национальный язык русского народа."
22+
// 3-byte sequences
23+
let japanese = "日本語(にほんご、にっぽんご)は、主に日本国内や日本人同士の間で使われている言語である。"
24+
// 4-byte sequences
25+
// Most commonly emoji, which are usually mixed with other text.
26+
let emoji = "Panda 🐼, Dog 🐶, Cat 🐱, Mouse 🐭."
27+
28+
let strings = [ ascii, russian, japanese, emoji ].map { Array($0.utf8) }
29+
2130
for _ in 1...N {
22-
var generator = data.generate()
23-
var utf8 = UTF8()
24-
while !utf8.decode(&generator).isEmptyInput() { }
31+
for string in strings {
32+
var generator = string.generate()
33+
var utf8 = UTF8()
34+
while !utf8.decode(&generator).isEmptyInput() { }
35+
}
2536
}
2637
}

0 commit comments

Comments
 (0)