Skip to content

[stdlib]String normalization functions #21026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Jan 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7078823
fast/foreignNormalize functions
Dec 1, 2018
2483f49
fix linux tests
Dec 5, 2018
44ca069
removed unimplemented() function, cleaned up the transcodeToUTF8 func…
Dec 5, 2018
5e90476
renamed some things in the normalization enums
Dec 5, 2018
a52f8fc
use constants instead of ✨magic✨ numbers
Dec 5, 2018
7c1d396
adopt the new normalize functions for String hashing
Dec 5, 2018
34a4d88
michael's feedback
Dec 5, 2018
76b79ef
fixed an indexing bug, closure->bool
Dec 6, 2018
ceaf53e
Michael's feedback + bug fix
Dec 6, 2018
f95ebe0
renamed some things, commented some other things
Dec 6, 2018
6c7624b
added a test to catch the bug I fixed
Dec 6, 2018
f0c068f
fix internalInvariant check logic. Fix typo in test
Dec 12, 2018
7bb3e9e
add opaque test for substrings
Dec 12, 2018
e37fa97
Remove some un-needed @inlinable attributes
Dec 12, 2018
8dd095b
add isOnUnicodeScalarBoundary method for UnsafeBufferPointer<UInt8>
Dec 12, 2018
1e9c624
non-opaque String tests pass
Dec 14, 2018
93dedae
opaque string comparison
Dec 14, 2018
310e177
More feedback
Dec 14, 2018
7bd432c
Remove redundant isNFCStarter function
Dec 17, 2018
09037ca
rename some things, rework fastFill to return an optional
Dec 17, 2018
bbd5416
moar renaming
Dec 17, 2018
e0976f0
Killed off _foreignNormalizedCompareImpl and _fastNormalizedCompareIm…
Dec 18, 2018
c9071fe
Remove recursive allocation functions, just continue after allocating…
Dec 18, 2018
aef3429
Kill off the normalized iterator
Dec 19, 2018
74c51ba
Style cleanups, performWithAllocationIfNecessary refactor
Dec 19, 2018
246c040
add comment for @inline(__always)
Dec 19, 2018
6119ebd
add labels to the normalization tuple returns
Dec 19, 2018
49ee5f0
Bad rebase
Dec 19, 2018
e7ac5b7
re-add some benchmarks
Dec 19, 2018
1b9aec9
Some cleanup, comments and code de-duplication
Dec 20, 2018
280e002
fix regression for UnicodeScalarView iteration
Jan 3, 2019
922437e
Make hashing faster
Jan 3, 2019
a47bcb8
Fix character iteration benchmarks
Jan 4, 2019
2e3eb1f
Remove some @inline(__always)
Jan 4, 2019
cef0fb8
restore the original benchmark name (for now)
Jan 5, 2019
36989a1
fix up the gyb file
Jan 7, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions benchmark/single-source/StringComparison.swift
Original file line number Diff line number Diff line change
Expand Up @@ -150,58 +150,58 @@ public let StringHashing: [BenchmarkInfo] = [
),
]

public let NormalizedIterator: [BenchmarkInfo] = [
public let StringNormalization: [BenchmarkInfo] = [
BenchmarkInfo(
name: "NormalizedIterator_ascii",
runFunction: run_NormalizedIterator_ascii,
runFunction: run_StringNormalization_ascii,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_ascii) }
),
BenchmarkInfo(
name: "NormalizedIterator_latin1",
runFunction: run_NormalizedIterator_latin1,
runFunction: run_StringNormalization_latin1,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_latin1) },
legacyFactor: 2
),
BenchmarkInfo(
name: "NormalizedIterator_fastPrenormal",
runFunction: run_NormalizedIterator_fastPrenormal,
runFunction: run_StringNormalization_fastPrenormal,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_fastPrenormal) },
legacyFactor: 10
),
BenchmarkInfo(
name: "NormalizedIterator_slowerPrenormal",
runFunction: run_NormalizedIterator_slowerPrenormal,
runFunction: run_StringNormalization_slowerPrenormal,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_slowerPrenormal) },
legacyFactor: 10
),
BenchmarkInfo(
name: "NormalizedIterator_nonBMPSlowestPrenormal",
runFunction: run_NormalizedIterator_nonBMPSlowestPrenormal,
runFunction: run_StringNormalization_nonBMPSlowestPrenormal,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_nonBMPSlowestPrenormal) },
legacyFactor: 10
),
BenchmarkInfo(
name: "NormalizedIterator_emoji",
runFunction: run_NormalizedIterator_emoji,
runFunction: run_StringNormalization_emoji,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_emoji) },
legacyFactor: 4
),
BenchmarkInfo(
name: "NormalizedIterator_abnormal",
runFunction: run_NormalizedIterator_abnormal,
runFunction: run_StringNormalization_abnormal,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_abnormal) },
legacyFactor: 20
),
BenchmarkInfo(
name: "NormalizedIterator_zalgo",
runFunction: run_NormalizedIterator_zalgo,
runFunction: run_StringNormalization_zalgo,
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_zalgo) },
legacyFactor: 25
Expand Down Expand Up @@ -452,7 +452,7 @@ public func run_StringHashing_zalgo(_ N: Int) {


@inline(never)
public func run_NormalizedIterator_ascii(_ N: Int) {
public func run_StringNormalization_ascii(_ N: Int) {
let workload: Workload = Workload.ascii
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -466,7 +466,7 @@ public func run_NormalizedIterator_ascii(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_latin1(_ N: Int) {
public func run_StringNormalization_latin1(_ N: Int) {
let workload: Workload = Workload.latin1
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -480,7 +480,7 @@ public func run_NormalizedIterator_latin1(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_fastPrenormal(_ N: Int) {
public func run_StringNormalization_fastPrenormal(_ N: Int) {
let workload: Workload = Workload.fastPrenormal
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -494,7 +494,7 @@ public func run_NormalizedIterator_fastPrenormal(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_slowerPrenormal(_ N: Int) {
public func run_StringNormalization_slowerPrenormal(_ N: Int) {
let workload: Workload = Workload.slowerPrenormal
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -508,7 +508,7 @@ public func run_NormalizedIterator_slowerPrenormal(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_nonBMPSlowestPrenormal(_ N: Int) {
public func run_StringNormalization_nonBMPSlowestPrenormal(_ N: Int) {
let workload: Workload = Workload.nonBMPSlowestPrenormal
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -522,7 +522,7 @@ public func run_NormalizedIterator_nonBMPSlowestPrenormal(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_emoji(_ N: Int) {
public func run_StringNormalization_emoji(_ N: Int) {
let workload: Workload = Workload.emoji
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -536,7 +536,7 @@ public func run_NormalizedIterator_emoji(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_abnormal(_ N: Int) {
public func run_StringNormalization_abnormal(_ N: Int) {
let workload: Workload = Workload.abnormal
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -550,7 +550,7 @@ public func run_NormalizedIterator_abnormal(_ N: Int) {
}

@inline(never)
public func run_NormalizedIterator_zalgo(_ N: Int) {
public func run_StringNormalization_zalgo(_ N: Int) {
let workload: Workload = Workload.zalgo
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -564,6 +564,7 @@ public func run_NormalizedIterator_zalgo(_ N: Int) {
}



struct Workload {
static let N = 100

Expand Down
17 changes: 9 additions & 8 deletions benchmark/single-source/StringComparison.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ AllWorkloads = ["ascii", "latin1", "fastPrenormal", "slowerPrenormal",
"longSharedPrefix"]
ComparisonWorkloads = AllWorkloads
HashingWorkloads = AllWorkloads[:-1]
NormalizedIteratorWorkloads = AllWorkloads[:-1]
NormalizationWorkloads = AllWorkloads[:-1]

LegacyFactor = dict(abnormal=20, emoji=4, latin1=2, fastPrenormal=10,
slowerPrenormal=10, nonBMPSlowestPrenormal=10, zalgo=25)
Expand Down Expand Up @@ -68,15 +68,15 @@ public let StringHashing: [BenchmarkInfo] = [
% end # HashingWorkloads
]

public let NormalizedIterator: [BenchmarkInfo] = [
% for Name in NormalizedIteratorWorkloads:
public let StringNormalization: [BenchmarkInfo] = [
% for Name in NormalizationWorkloads:
BenchmarkInfo(
name: "NormalizedIterator_${Name}",
runFunction: run_NormalizedIterator_${Name},
runFunction: run_StringNormalization_${Name},
tags: [.validation, .String],
setUpFunction: { blackHole(Workload_${Name}) }${legacyFactor(Name)}
),
% end # NormalizedIteratorWorkloads
% end # NormalizationWorkloads
]

% for Name in AllWorkloads:
Expand Down Expand Up @@ -116,9 +116,9 @@ public func run_StringHashing_${Name}(_ N: Int) {

% end # HashingWorkloads

%for Name in NormalizedIteratorWorkloads:
%for Name in NormalizationWorkloads:
@inline(never)
public func run_NormalizedIterator_${Name}(_ N: Int) {
public func run_StringNormalization_${Name}(_ N: Int) {
let workload: Workload = Workload.${Name}
let tripCount = workload.tripCount
let payload = workload.payload
Expand All @@ -131,7 +131,8 @@ public func run_NormalizedIterator_${Name}(_ N: Int) {
}
}

% end # NormalizedIteratorWorkloads
% end # NormalizationWorkloads


struct Workload {
static let N = 100
Expand Down
2 changes: 1 addition & 1 deletion benchmark/utils/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ registerBenchmark(NSErrorTest)
registerBenchmark(NSStringConversion)
registerBenchmark(NibbleSort)
registerBenchmark(NopDeinit)
registerBenchmark(NormalizedIterator)
registerBenchmark(ObjectAllocation)
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
registerBenchmark(ObjectiveCBridging)
Expand Down Expand Up @@ -332,6 +331,7 @@ registerBenchmark(StringInterpolation)
registerBenchmark(StringInterpolationSmall)
registerBenchmark(StringInterpolationManySmallSegments)
registerBenchmark(StringMatch)
registerBenchmark(StringNormalization)
registerBenchmark(StringRemoveDupes)
registerBenchmark(StringTests)
registerBenchmark(StringWalk)
Expand Down
1 change: 0 additions & 1 deletion stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ set(SWIFTLIB_ESSENTIAL
NativeDictionary.swift
NativeSet.swift
NewtypeWrapper.swift
NormalizedCodeUnitIterator.swift
ObjectIdentifier.swift
Optional.swift
OptionSet.swift
Expand Down
Loading