Skip to content

Commit 6ae2fc9

Browse files
committed
[stdlib] Switch to SipHash-1-3 for the standard hash function
Beyond switching hashing algorithms, this also enables per-execution hash seeds, fulfilling a long-standing prophecy in Hashable’s documentation. To reduce the possibility of random test failures, StdlibUnittest’s TestSuite overrides the random hash seed on initialization. rdar://problem/24109692 rdar://problem/35052153
1 parent fc4d4e2 commit 6ae2fc9

File tree

4 files changed

+172
-247
lines changed

4 files changed

+172
-247
lines changed

stdlib/private/StdlibUnittest/StdlibUnittest.swift.gyb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,15 @@ struct PersistentState {
10891089
static var runNoTestsWasCalled: Bool = false
10901090
static var ranSomething: Bool = false
10911091
static var complaintInstalled = false
1092+
static var hashingKeyOverridden = false
1093+
1094+
static func overrideHashingKey() {
1095+
if !hashingKeyOverridden {
1096+
// FIXME(hasher): This has to run before creating the first Set/Dictionary
1097+
_Hashing.secretKey = (0, 0)
1098+
hashingKeyOverridden = true
1099+
}
1100+
}
10921101

10931102
static func complainIfNothingRuns() {
10941103
if !complaintInstalled {
@@ -1200,6 +1209,7 @@ func stopTrackingObjects(_: UnsafePointer<CChar>) -> Int
12001209

12011210
public final class TestSuite {
12021211
public init(_ name: String) {
1212+
PersistentState.overrideHashingKey()
12031213
self.name = name
12041214
_precondition(
12051215
_testNameToIndex[name] == nil,

stdlib/public/core/Hashing.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,4 +311,4 @@ internal struct _LegacyHasher {
311311
}
312312
}
313313

314-
internal typealias _Hasher = _LegacyHasher
314+
internal typealias _Hasher = _SipHash13

stdlib/public/core/SipHash.swift.gyb

Lines changed: 94 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -19,63 +19,20 @@
1919
/// * Daniel J. Bernstein <[email protected]>
2020
//===----------------------------------------------------------------------===//
2121

22+
%{
23+
# Number of bits in the Builtin.Word type
24+
word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
25+
}%
26+
2227
@_fixed_layout // FIXME(sil-serialize-all)
2328
@_versioned
2429
internal enum _SipHashDetail {
25-
@_inlineable // FIXME(sil-serialize-all)
26-
@_versioned
27-
@inline(__always)
28-
internal static func _rotate(_ x: UInt64, leftBy amount: Int) -> UInt64 {
29-
return (x &<< UInt64(amount)) | (x &>> UInt64(64 - amount))
30-
}
31-
32-
@_inlineable // FIXME(sil-serialize-all)
33-
@_versioned
34-
@inline(__always)
35-
internal static func _loadUnalignedUInt64LE(
36-
from p: UnsafeRawPointer
37-
) -> UInt64 {
38-
// FIXME(integers): split into multiple expressions to speed up the
39-
// typechecking
40-
var result =
41-
UInt64(p.load(fromByteOffset: 0, as: UInt8.self))
42-
result |=
43-
(UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) &<< (8 as UInt64))
44-
result |=
45-
(UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) &<< (16 as UInt64))
46-
result |=
47-
(UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) &<< (24 as UInt64))
48-
result |=
49-
(UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) &<< (32 as UInt64))
50-
result |=
51-
(UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) &<< (40 as UInt64))
52-
result |=
53-
(UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) &<< (48 as UInt64))
54-
result |=
55-
(UInt64(p.load(fromByteOffset: 7, as: UInt8.self)) &<< (56 as UInt64))
56-
return result
57-
}
58-
59-
@_inlineable // FIXME(sil-serialize-all)
6030
@_versioned
6131
@inline(__always)
62-
internal static func _loadPartialUnalignedUInt64LE(
63-
from p: UnsafeRawPointer,
64-
byteCount: Int
65-
) -> UInt64 {
66-
_sanityCheck((0..<8).contains(byteCount))
67-
var result: UInt64 = 0
68-
if byteCount >= 1 { result |= UInt64(p.load(fromByteOffset: 0, as: UInt8.self)) }
69-
if byteCount >= 2 { result |= UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) &<< (8 as UInt64) }
70-
if byteCount >= 3 { result |= UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) &<< (16 as UInt64) }
71-
if byteCount >= 4 { result |= UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) &<< (24 as UInt64) }
72-
if byteCount >= 5 { result |= UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) &<< (32 as UInt64) }
73-
if byteCount >= 6 { result |= UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) &<< (40 as UInt64) }
74-
if byteCount >= 7 { result |= UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) &<< (48 as UInt64) }
75-
return result
32+
internal static func _rotate(_ x: UInt64, leftBy amount: UInt64) -> UInt64 {
33+
return (x &<< amount) | (x &>> (64 - amount))
7634
}
7735

78-
@_inlineable // FIXME(sil-serialize-all)
7936
@_versioned
8037
@inline(__always)
8138
internal static func _sipRound(
@@ -102,7 +59,7 @@ internal enum _SipHashDetail {
10259
}
10360

10461
% for (c_rounds, d_rounds) in [(2, 4), (1, 3)]:
105-
% Self = '_SipHash{}{}Context'.format(c_rounds, d_rounds)
62+
% Self = '_SipHash{}{}'.format(c_rounds, d_rounds)
10663

10764
@_fixed_layout // FIXME(sil-serialize-all)
10865
public // @testable
@@ -120,181 +77,130 @@ struct ${Self} {
12077
@_versioned
12178
internal var v3: UInt64 = 0x7465646279746573
12279

80+
/// This value holds the byte count and the pending bytes that haven't been
81+
/// compressed yet, in the format that the finalization step needs. (The least
82+
/// significant 56 bits hold the trailing bytes, while the most significant 8
83+
/// bits hold the count of bytes appended so far, mod 256.)
12384
@_versioned
124-
internal var hashedByteCount: UInt64 = 0
125-
126-
@_versioned
127-
internal var dataTail: UInt64 = 0
128-
129-
@_versioned
130-
internal var dataTailByteCount: Int = 0
85+
internal var tailAndByteCount: UInt64 = 0
13186

132-
@_versioned
133-
internal var finalizedHash: UInt64?
134-
135-
@_inlineable // FIXME(sil-serialize-all)
87+
@inline(__always)
13688
public init(key: (UInt64, UInt64)) {
13789
v3 ^= key.1
13890
v2 ^= key.0
13991
v1 ^= key.1
14092
v0 ^= key.0
14193
}
14294

143-
// FIXME(ABI)#62 (UnsafeRawBufferPointer): Use UnsafeRawBufferPointer.
144-
@_inlineable // FIXME(sil-serialize-all)
145-
public // @testable
146-
mutating func append(_ data: UnsafeRawPointer, byteCount: Int) {
147-
_append_alwaysInline(data, byteCount: byteCount)
95+
@inline(__always)
96+
public init() {
97+
self.init(key: _Hashing.secretKey)
14898
}
14999

150-
// FIXME(ABI)#63 (UnsafeRawBufferPointer): Use UnsafeRawBufferPointer.
151-
@_inlineable // FIXME(sil-serialize-all)
152100
@_versioned
153-
@inline(__always)
154-
internal mutating func _append_alwaysInline(
155-
_ data: UnsafeRawPointer,
156-
byteCount: Int
157-
) {
158-
precondition(finalizedHash == nil)
159-
_sanityCheck((0..<8).contains(dataTailByteCount))
160-
161-
let dataEnd = data + byteCount
162-
163-
var data = data
164-
var byteCount = byteCount
165-
if dataTailByteCount != 0 {
166-
let restByteCount = min(
167-
MemoryLayout<UInt64>.size - dataTailByteCount,
168-
byteCount)
169-
let rest = _SipHashDetail._loadPartialUnalignedUInt64LE(
170-
from: data,
171-
byteCount: restByteCount)
172-
dataTail |= rest &<< UInt64(dataTailByteCount * 8)
173-
dataTailByteCount += restByteCount
174-
data += restByteCount
175-
byteCount -= restByteCount
176-
}
177-
178-
if dataTailByteCount == MemoryLayout<UInt64>.size {
179-
_appendDirectly(dataTail)
180-
dataTail = 0
181-
dataTailByteCount = 0
182-
} else if dataTailByteCount != 0 {
183-
_sanityCheck(data == dataEnd)
184-
return
185-
}
186-
187-
let endOfWords =
188-
data + byteCount - (byteCount % MemoryLayout<UInt64>.size)
189-
while data != endOfWords {
190-
_appendDirectly(_SipHashDetail._loadUnalignedUInt64LE(from: data))
191-
data += 8
192-
// No need to update `byteCount`, it is not used beyond this point.
101+
internal var byteCount: UInt64 {
102+
@inline(__always)
103+
get {
104+
return tailAndByteCount &>> 56
193105
}
106+
}
194107

195-
if data != dataEnd {
196-
dataTailByteCount = dataEnd - data
197-
dataTail = _SipHashDetail._loadPartialUnalignedUInt64LE(
198-
from: data,
199-
byteCount: dataTailByteCount)
108+
@_versioned
109+
internal var tail: UInt64 {
110+
@inline(__always)
111+
get {
112+
return tailAndByteCount & ~(0xFF &<< 56)
200113
}
201114
}
202115

203-
/// This function mixes in the given word directly into the state,
204-
/// ignoring `dataTail`.
205-
@_inlineable // FIXME(sil-serialize-all)
206-
@_versioned
207116
@inline(__always)
208-
internal mutating func _appendDirectly(_ m: UInt64) {
117+
@_versioned
118+
internal mutating func _compress(_ m: UInt64) {
209119
v3 ^= m
210-
for _ in 0..<${c_rounds} {
211-
_SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3)
212-
}
120+
% for _ in range(0, c_rounds):
121+
_SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3)
122+
% end
213123
v0 ^= m
214-
hashedByteCount += 8
215124
}
216125

217-
% for data_type in ['UInt', 'Int', 'UInt64', 'Int64', 'UInt32', 'Int32']:
218-
@_inlineable // FIXME(sil-serialize-all)
219-
public // @testable
220-
mutating func append(_ data: ${data_type}) {
221-
var data = data
222-
_append_alwaysInline(&data, byteCount: MemoryLayout.size(ofValue: data))
126+
@inline(__always)
127+
public mutating func append(_ value: Int) {
128+
append(UInt(bitPattern: value))
223129
}
224-
% end
225130

226-
@_inlineable // FIXME(sil-serialize-all)
227-
public // @testable
228-
mutating func finalizeAndReturnHash() -> UInt64 {
229-
return _finalizeAndReturnHash_alwaysInline()
131+
@inline(__always)
132+
public mutating func append(_ value: UInt) {
133+
% if word_bits == 64:
134+
append(UInt64(_truncatingBits: value._lowWord))
135+
% elif word_bits == 32:
136+
append(UInt32(_truncatingBits: value._lowWord))
137+
% else:
138+
fatalError("Unsupported word width")
139+
% end
230140
}
231141

232-
@_inlineable // FIXME(sil-serialize-all)
233-
@_versioned
234142
@inline(__always)
235-
internal mutating func _finalizeAndReturnHash_alwaysInline() -> UInt64 {
236-
if let finalizedHash = finalizedHash {
237-
return finalizedHash
238-
}
239-
240-
_sanityCheck((0..<8).contains(dataTailByteCount))
241-
242-
hashedByteCount += UInt64(dataTailByteCount)
243-
let b: UInt64 = (hashedByteCount << 56) | dataTail
244-
245-
v3 ^= b
246-
for _ in 0..<${c_rounds} {
247-
_SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3)
248-
}
249-
v0 ^= b
250-
251-
v2 ^= 0xff
143+
public mutating func append(_ value: Int32) {
144+
append(UInt32(bitPattern: value))
145+
}
252146

253-
for _ in 0..<${d_rounds} {
254-
_SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3)
147+
@inline(__always)
148+
public mutating func append(_ value: UInt32) {
149+
let m = UInt64(_truncatingBits: value._lowWord)
150+
if byteCount & 4 == 0 {
151+
_sanityCheck(byteCount & 7 == 0 && tail == 0)
152+
tailAndByteCount = (tailAndByteCount | m) &+ (4 &<< 56)
153+
} else {
154+
_sanityCheck(byteCount & 3 == 0)
155+
_compress((m &<< 32) | tail)
156+
tailAndByteCount = (byteCount &+ 4) &<< 56
255157
}
158+
}
256159

257-
finalizedHash = v0 ^ v1 ^ v2 ^ v3
258-
return finalizedHash!
160+
@inline(__always)
161+
public mutating func append(_ value: Int64) {
162+
append(UInt64(bitPattern: value))
259163
}
260164

261-
@_inlineable // FIXME(sil-serialize-all)
262-
@_versioned // FIXME(sil-serialize-all)
263-
internal mutating func _finalizeAndReturnIntHash() -> Int {
264-
let hash: UInt64 = finalizeAndReturnHash()
265-
#if arch(i386) || arch(arm)
266-
return Int(truncatingIfNeeded: hash)
267-
#elseif arch(x86_64) || arch(arm64) || arch(powerpc64) || arch(powerpc64le) || arch(s390x)
268-
return Int(Int64(bitPattern: hash))
269-
#endif
165+
@inline(__always)
166+
public mutating func append(_ m: UInt64) {
167+
if byteCount & 4 == 0 {
168+
_sanityCheck(byteCount & 7 == 0 && tail == 0)
169+
_compress(m)
170+
tailAndByteCount = tailAndByteCount &+ (8 &<< 56)
171+
} else {
172+
_sanityCheck(byteCount & 3 == 0)
173+
_compress((m &<< 32) | tail)
174+
tailAndByteCount = ((byteCount &+ 8) &<< 56) | (m &>> 32)
175+
}
270176
}
271177

272-
// FIXME(ABI)#64 (UnsafeRawBufferPointer): Use UnsafeRawBufferPointer.
273-
@_inlineable // FIXME(sil-serialize-all)
274-
public // @testable
275-
static func hash(
276-
data: UnsafeRawPointer,
277-
dataByteCount: Int,
278-
key: (UInt64, UInt64)
178+
@inline(__always)
179+
public mutating func finalize(
180+
tailBytes: UInt64,
181+
tailByteCount: Int
279182
) -> UInt64 {
280-
return ${Self}._hash_alwaysInline(
281-
data: data,
282-
dataByteCount: dataByteCount,
283-
key: key)
183+
_sanityCheck(tailByteCount >= 0 && tailByteCount < 8)
184+
_sanityCheck(tailByteCount + (byteCount & 7) <= 7)
185+
_sanityCheck(tailBytes >> (tailByteCount << 3) == 0)
186+
let count = UInt64(_truncatingBits: tailByteCount._lowWord)
187+
let currentByteCount = byteCount & 7
188+
tailAndByteCount |= (tailBytes &<< (currentByteCount &<< 3))
189+
tailAndByteCount = tailAndByteCount &+ (count &<< 56)
190+
return finalize()
284191
}
285192

286-
// FIXME(ABI)#65 (UnsafeRawBufferPointer): Use UnsafeRawBufferPointer.
287-
@_inlineable // FIXME(sil-serialize-all)
288193
@inline(__always)
289-
public // @testable
290-
static func _hash_alwaysInline(
291-
data: UnsafeRawPointer,
292-
dataByteCount: Int,
293-
key: (UInt64, UInt64)
294-
) -> UInt64 {
295-
var context = ${Self}(key: key)
296-
context._append_alwaysInline(data, byteCount: dataByteCount)
297-
return context._finalizeAndReturnHash_alwaysInline()
194+
public mutating func finalize() -> UInt64 {
195+
_compress(tailAndByteCount)
196+
197+
v2 ^= 0xff
198+
199+
for _ in 0..<${d_rounds} {
200+
_SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3)
201+
}
202+
203+
return (v0 ^ v1 ^ v2 ^ v3)
298204
}
299205
}
300206
% end

0 commit comments

Comments
 (0)