Skip to content

Commit a155b75

Browse files
authored
Merge pull request #16029 from lorentey/fast-ascii-hashing
[stdlib] Optimize ASCII String hashing
2 parents 135425c + ebf290a commit a155b75

File tree

3 files changed

+49
-97
lines changed

3 files changed

+49
-97
lines changed

stdlib/public/core/Hasher.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ internal struct _BufferingHasher<Core: _HasherCore> {
250250
public struct _Hasher {
251251
internal typealias Core = _BufferingHasher<_SipHash13Core>
252252

253-
private var _core: Core
253+
internal var _core: Core
254254

255255
@effects(releasenone)
256256
public init() {

stdlib/public/core/StringHashable.swift

Lines changed: 40 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -12,168 +12,112 @@
1212

1313
import SwiftShims
1414

15-
func _emptyASCIIHashBuffer() -> _UIntBuffer<UInt64, UInt8> {
16-
var buffer = _UIntBuffer<UInt64, UInt8>()
17-
// We don't want the unused bits of a partially filled buffer to collide
18-
// with trailing nuls when hashing
19-
buffer._storage = UInt64.max
20-
return buffer
21-
}
22-
23-
internal struct ASCIIHasher {
24-
private var buffer = _emptyASCIIHashBuffer()
25-
26-
internal mutating func consume() -> UInt64? {
27-
if !buffer.isEmpty {
28-
defer { resetBuffer() }
29-
return buffer._storage
30-
}
31-
return nil
32-
}
33-
34-
private mutating func resetBuffer() {
35-
buffer = _emptyASCIIHashBuffer()
36-
}
37-
38-
internal mutating func append(_ c: UInt8) -> UInt64? {
39-
if buffer.count < buffer.capacity {
40-
buffer.append(c)
41-
}
42-
43-
if buffer.count == buffer.capacity {
44-
defer { resetBuffer() }
45-
return buffer._storage
46-
}
47-
return nil
48-
}
49-
}
50-
5115
extension _UnmanagedString where CodeUnit == UInt8 {
52-
// NOT @usableFromInline
53-
@effects(releasenone)
54-
internal func hashASCII(into hasher: inout _Hasher) {
55-
var asciiHasher = ASCIIHasher()
56-
for c in self {
57-
if let chunk = asciiHasher.append(UInt8(truncatingIfNeeded: c)) {
58-
hasher.combine(chunk)
59-
}
60-
}
61-
62-
if let chunk = asciiHasher.consume() {
63-
hasher.combine(chunk)
64-
}
16+
internal func hashASCII(into core: inout _Hasher.Core) {
17+
core.combine(bytes: rawBuffer)
6518
}
6619
}
6720

6821
extension BidirectionalCollection where Element == UInt16, SubSequence == Self {
69-
// NOT @usableFromInline
70-
internal func hashUTF16(into hasher: inout _Hasher) {
71-
var asciiHasher = ASCIIHasher()
72-
22+
internal func hashUTF16(into core: inout _Hasher.Core) {
7323
for i in self.indices {
7424
let cu = self[i]
7525
let cuIsASCII = cu <= 0x7F
7626
let isSingleSegmentScalar = self.hasNormalizationBoundary(after: i)
7727

78-
guard cuIsASCII && isSingleSegmentScalar else {
79-
if let chunk = asciiHasher.consume() {
80-
hasher.combine(chunk)
81-
}
82-
83-
let codeUnitSequence = IteratorSequence(
84-
_NormalizedCodeUnitIterator(self[i..<endIndex])
85-
)
86-
for element in codeUnitSequence {
87-
hasher.combine(UInt(element))
28+
if cuIsASCII && isSingleSegmentScalar {
29+
core.combine(UInt8(truncatingIfNeeded: cu))
30+
} else {
31+
for encodedScalar in Unicode._ParsingIterator(
32+
codeUnits: _NormalizedCodeUnitIterator(self[i..<endIndex]),
33+
parser: Unicode.UTF16.ForwardParser()
34+
) {
35+
let transcoded = Unicode.UTF8.transcode(
36+
encodedScalar, from: Unicode.UTF16.self
37+
).unsafelyUnwrapped // never fails
38+
let (bytes, count) = transcoded._bytes
39+
core.combine(bytes: bytes, count: count)
8840
}
8941
return
9042
}
91-
92-
if let chunk = asciiHasher.append(UInt8(truncatingIfNeeded: cu)) {
93-
hasher.combine(chunk)
94-
}
95-
}
96-
97-
if let chunk = asciiHasher.consume() {
98-
hasher.combine(chunk)
9943
}
10044
}
10145
}
10246

10347
extension _UnmanagedString where CodeUnit == UInt8 {
104-
@effects(releasenone)
105-
@usableFromInline
106-
internal func computeHashValue(into hasher: inout _Hasher) {
107-
self.hashASCII(into: &hasher)
48+
internal func hash(into hasher: inout _Hasher) {
49+
self.hashASCII(into: &hasher._core)
50+
hasher._core.combine(0xFF as UInt8) // terminator
10851
}
10952
}
11053

11154
extension _UnmanagedString where CodeUnit == UInt16 {
112-
@effects(releasenone)
113-
@usableFromInline
114-
internal func computeHashValue(into hasher: inout _Hasher) {
115-
self.hashUTF16(into: &hasher)
55+
internal func hash(into hasher: inout _Hasher) {
56+
self.hashUTF16(into: &hasher._core)
57+
hasher._core.combine(0xFF as UInt8) // terminator
11658
}
11759
}
11860

11961
extension _UnmanagedOpaqueString {
120-
@usableFromInline
121-
internal func computeHashValue(into hasher: inout _Hasher) {
122-
self.hashUTF16(into: &hasher)
62+
internal func hash(into hasher: inout _Hasher) {
63+
self.hashUTF16(into: &hasher._core)
64+
hasher._core.combine(0xFF as UInt8) // terminator
12365
}
12466
}
12567

12668
extension _SmallUTF8String {
127-
@inlinable
128-
internal func computeHashValue(into hasher: inout _Hasher) {
69+
internal func hash(into hasher: inout _Hasher) {
12970
#if arch(i386) || arch(arm)
13071
unsupportedOn32bit()
13172
#else
13273
if isASCII {
133-
return self.withUnmanagedASCII { $0.computeHashValue(into: &hasher) }
74+
self.withUnmanagedASCII { $0.hash(into: &hasher) }
75+
return
13476
}
135-
return self.withUnmanagedUTF16 { $0.computeHashValue(into: &hasher) }
77+
self.withUnmanagedUTF16 { $0.hash(into: &hasher) }
13678
#endif // 64-bit
13779
}
13880
}
13981

14082
extension _StringGuts {
83+
@effects(releasenone) // FIXME: Is this valid in the opaque case?
14184
@usableFromInline
142-
@effects(releasenone) // FIXME: Is this guaranteed in the opaque case?
14385
internal func _hash(into hasher: inout _Hasher) {
14486
if _isSmall {
145-
return _smallUTF8String.computeHashValue(into: &hasher)
87+
_smallUTF8String.hash(into: &hasher)
88+
return
14689
}
14790

14891
defer { _fixLifetime(self) }
14992
if _slowPath(_isOpaque) {
150-
_asOpaque().computeHashValue(into: &hasher)
93+
_asOpaque().hash(into: &hasher)
15194
return
15295
}
15396
if isASCII {
154-
_unmanagedASCIIView.computeHashValue(into: &hasher)
97+
_unmanagedASCIIView.hash(into: &hasher)
15598
return
15699
}
157-
_unmanagedUTF16View.computeHashValue(into: &hasher)
100+
_unmanagedUTF16View.hash(into: &hasher)
158101
}
159102

103+
@effects(releasenone) // FIXME: Is this valid in the opaque case?
160104
@usableFromInline
161-
@effects(releasenone) // FIXME: Is this guaranteed in the opaque case?
162105
internal func _hash(_ range: Range<Int>, into hasher: inout _Hasher) {
163106
if _isSmall {
164-
return _smallUTF8String[range].computeHashValue(into: &hasher)
107+
_smallUTF8String[range].hash(into: &hasher)
108+
return
165109
}
166110

167111
defer { _fixLifetime(self) }
168112
if _slowPath(_isOpaque) {
169-
_asOpaque()[range].computeHashValue(into: &hasher)
113+
_asOpaque()[range].hash(into: &hasher)
170114
return
171115
}
172116
if isASCII {
173-
_unmanagedASCIIView[range].computeHashValue(into: &hasher)
117+
_unmanagedASCIIView[range].hash(into: &hasher)
174118
return
175119
}
176-
_unmanagedUTF16View[range].computeHashValue(into: &hasher)
120+
_unmanagedUTF16View[range].hash(into: &hasher)
177121
}
178122
}
179123

stdlib/public/core/ValidUTF8Buffer.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,4 +217,12 @@ extension _ValidUTF8Buffer {
217217
public static var encodedReplacementCharacter : _ValidUTF8Buffer {
218218
return _ValidUTF8Buffer(_biasedBits: 0xBD_BF_EF &+ 0x01_01_01)
219219
}
220+
221+
@inlinable
222+
internal var _bytes: (bytes: UInt64, count: Int) {
223+
let count = self.count
224+
let mask: UInt64 = 1 &<< (UInt64(truncatingIfNeeded: count) &<< 3) &- 1
225+
let unbiased = UInt64(truncatingIfNeeded: _biasedBits) &- 0x0101010101010101
226+
return (unbiased & mask, count)
227+
}
220228
}

0 commit comments

Comments
 (0)