Skip to content

Commit 7e66a38

Browse files
committed
[stdlib] De-gyb SipHash; implement full Hasher API
1 parent ccdc218 commit 7e66a38

File tree

11 files changed

+635
-455
lines changed

11 files changed

+635
-455
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ set(SWIFTLIB_ESSENTIAL
6666
AnyHashable.swift
6767
# END WORKAROUND
6868
HashedCollectionsAnyHashableExtensions.swift
69+
Hasher.swift
6970
Hashing.swift
7071
HeapBuffer.swift
7172
ICU.swift
@@ -105,7 +106,7 @@ set(SWIFTLIB_ESSENTIAL
105106
Reverse.swift
106107
Runtime.swift.gyb
107108
RuntimeFunctionCounters.swift
108-
SipHash.swift.gyb
109+
SipHash.swift
109110
SentinelCollection.swift
110111
Sequence.swift
111112
SequenceAlgorithms.swift

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@
165165
"Interval.swift",
166166
"Hashing.swift",
167167
"SipHash.swift",
168+
"Hasher.swift",
168169
"ErrorType.swift",
169170
"InputStream.swift",
170171
"LifetimeManager.swift",

stdlib/public/core/Hashable.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,15 @@ public protocol Hashable : Equatable {
114114
}
115115

116116
extension Hashable {
117+
@inlinable
117118
@inline(__always)
118119
public func _hash(into hasher: inout _Hasher) {
119120
hasher.combine(self.hashValue)
120121
}
121122
}
122123

123124
// Called by synthesized `hashValue` implementations.
125+
@inlinable
124126
@inline(__always)
125127
public func _hashValue<H: Hashable>(for value: H) -> Int {
126128
var hasher = _Hasher()

stdlib/public/core/Hasher.swift

Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// Defines the Hasher struct, representing Swift's standard hash function.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
import SwiftShims
18+
19+
internal protocol _HasherCore {
20+
init(seed: (UInt64, UInt64))
21+
mutating func compress(_ value: UInt64)
22+
mutating func finalize(tailAndByteCount: UInt64) -> UInt64
23+
}
24+
25+
@inline(__always)
26+
internal func _loadPartialUnalignedUInt64LE(
27+
_ p: UnsafeRawPointer,
28+
byteCount: Int
29+
) -> UInt64 {
30+
var result: UInt64 = 0
31+
switch byteCount {
32+
case 7:
33+
result |= UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) &<< 48
34+
fallthrough
35+
case 6:
36+
result |= UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) &<< 40
37+
fallthrough
38+
case 5:
39+
result |= UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) &<< 32
40+
fallthrough
41+
case 4:
42+
result |= UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) &<< 24
43+
fallthrough
44+
case 3:
45+
result |= UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) &<< 16
46+
fallthrough
47+
case 2:
48+
result |= UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) &<< 8
49+
fallthrough
50+
case 1:
51+
result |= UInt64(p.load(fromByteOffset: 0, as: UInt8.self))
52+
fallthrough
53+
case 0:
54+
return result
55+
default:
56+
_sanityCheckFailure()
57+
}
58+
}
59+
60+
/// This is a buffer for segmenting arbitrary data into 8-byte chunks. Buffer
61+
/// storage is represented by a single 64-bit value in the format used by the
62+
/// finalization step of SipHash. (The least significant 56 bits hold the
63+
/// trailing bytes, while the most significant 8 bits hold the count of bytes
64+
/// appended so far, modulo 256. The count of bytes currently stored in the
65+
/// buffer is in the lower three bits of the byte count.)
66+
internal struct _HasherTailBuffer {
67+
// msb lsb
68+
// +---------+-------+-------+-------+-------+-------+-------+-------+
69+
// |byteCount| tail (<= 56 bits) |
70+
// +---------+-------+-------+-------+-------+-------+-------+-------+
71+
internal var value: UInt64
72+
73+
@inline(__always)
74+
internal init() {
75+
self.value = 0
76+
}
77+
78+
@inline(__always)
79+
internal init(tail: UInt64, byteCount: UInt64) {
80+
// byteCount can be any value, but we only keep the lower 8 bits. (The
81+
// lower three bits specify the count of bytes stored in this buffer.)
82+
_sanityCheck(tail & ~(1 << ((byteCount & 7) << 3) - 1) == 0)
83+
self.value = (byteCount &<< 56 | tail)
84+
}
85+
86+
internal var tail: UInt64 {
87+
@inline(__always)
88+
get { return value & ~(0xFF &<< 56) }
89+
}
90+
91+
internal var byteCount: UInt64 {
92+
@inline(__always)
93+
get { return value &>> 56 }
94+
}
95+
96+
internal var isFinalized: Bool {
97+
@inline(__always)
98+
get { return value == 1 }
99+
}
100+
101+
@inline(__always)
102+
internal mutating func finalize() {
103+
// A byteCount of 0 with a nonzero tail never occurs during normal use.
104+
value = 1
105+
}
106+
107+
@inline(__always)
108+
internal mutating func append(_ bytes: UInt64) -> UInt64 {
109+
let c = byteCount & 7
110+
if c == 0 {
111+
value = value &+ (8 &<< 56)
112+
return bytes
113+
}
114+
let shift = c &<< 3
115+
let chunk = tail | (bytes &<< shift)
116+
value = (((value &>> 56) &+ 8) &<< 56) | (bytes &>> (64 - shift))
117+
return chunk
118+
}
119+
120+
@inline(__always)
121+
internal
122+
mutating func append(_ bytes: UInt64, count: UInt64) -> UInt64? {
123+
_sanityCheck(count >= 0 && count < 8)
124+
_sanityCheck(bytes & ~((1 &<< (count &<< 3)) &- 1) == 0)
125+
let c = byteCount & 7
126+
let shift = c &<< 3
127+
if c + count < 8 {
128+
value = (value | (bytes &<< shift)) &+ (count &<< 56)
129+
return nil
130+
}
131+
let chunk = tail | (bytes &<< shift)
132+
value = ((value &>> 56) &+ count) &<< 56
133+
if c + count > 8 {
134+
value |= bytes &>> (64 - shift)
135+
}
136+
return chunk
137+
}
138+
}
139+
140+
internal struct _BufferingHasher<Core: _HasherCore> {
141+
private var _buffer: _HasherTailBuffer
142+
private var _core: Core
143+
144+
@inline(__always)
145+
internal init(seed: (UInt64, UInt64)) {
146+
self._buffer = _HasherTailBuffer()
147+
self._core = Core(seed: seed)
148+
}
149+
150+
@inline(__always)
151+
internal mutating func combine(_ value: UInt) {
152+
#if arch(i386) || arch(arm)
153+
combine(UInt32(truncatingIfNeeded: value))
154+
#else
155+
combine(UInt64(truncatingIfNeeded: value))
156+
#endif
157+
}
158+
159+
@inline(__always)
160+
internal mutating func combine(_ value: UInt64) {
161+
precondition(!_buffer.isFinalized)
162+
_core.compress(_buffer.append(value))
163+
}
164+
165+
@inline(__always)
166+
internal mutating func combine(_ value: UInt32) {
167+
precondition(!_buffer.isFinalized)
168+
let value = UInt64(truncatingIfNeeded: value)
169+
if let chunk = _buffer.append(value, count: 4) {
170+
_core.compress(chunk)
171+
}
172+
}
173+
174+
@inline(__always)
175+
internal mutating func combine(_ value: UInt16) {
176+
precondition(!_buffer.isFinalized)
177+
let value = UInt64(truncatingIfNeeded: value)
178+
if let chunk = _buffer.append(value, count: 2) {
179+
_core.compress(chunk)
180+
}
181+
}
182+
183+
@inline(__always)
184+
internal mutating func combine(_ value: UInt8) {
185+
precondition(!_buffer.isFinalized)
186+
let value = UInt64(truncatingIfNeeded: value)
187+
if let chunk = _buffer.append(value, count: 1) {
188+
_core.compress(chunk)
189+
}
190+
}
191+
192+
@inline(__always)
193+
internal mutating func combine(bytes: UInt64, count: Int) {
194+
precondition(!_buffer.isFinalized)
195+
_sanityCheck(count <= 8)
196+
let count = UInt64(truncatingIfNeeded: count)
197+
if let chunk = _buffer.append(bytes, count: count) {
198+
_core.compress(chunk)
199+
}
200+
}
201+
202+
@inline(__always)
203+
internal mutating func combine(bytes: UnsafeRawBufferPointer) {
204+
precondition(!_buffer.isFinalized)
205+
var remaining = bytes.count
206+
guard remaining > 0 else { return }
207+
var data = bytes.baseAddress!
208+
209+
// Load first unaligned partial word of data
210+
do {
211+
let start = UInt(bitPattern: data)
212+
let end = _roundUp(start, toAlignment: MemoryLayout<UInt64>.alignment)
213+
let c = min(remaining, Int(end - start))
214+
if c > 0 {
215+
let chunk = _loadPartialUnalignedUInt64LE(data, byteCount: c)
216+
combine(bytes: chunk, count: c)
217+
data += c
218+
remaining -= c
219+
}
220+
}
221+
_sanityCheck(
222+
remaining == 0 ||
223+
Int(bitPattern: data) & (MemoryLayout<UInt64>.alignment - 1) == 0)
224+
225+
// Load as many aligned words as there are in the input buffer
226+
while remaining >= MemoryLayout<UInt64>.size {
227+
combine(UInt64(littleEndian: data.load(as: UInt64.self)))
228+
data += MemoryLayout<UInt64>.size
229+
remaining -= MemoryLayout<UInt64>.size
230+
}
231+
232+
// Load last partial word of data
233+
_sanityCheck(remaining >= 0 && remaining < 8)
234+
if remaining > 0 {
235+
let chunk = _loadPartialUnalignedUInt64LE(data, byteCount: remaining)
236+
combine(bytes: chunk, count: remaining)
237+
}
238+
}
239+
240+
@inline(__always)
241+
internal mutating func finalize() -> UInt64 {
242+
precondition(!_buffer.isFinalized)
243+
let hash = _core.finalize(tailAndByteCount: _buffer.value)
244+
_buffer.finalize()
245+
return hash
246+
}
247+
}
248+
249+
@_fixed_layout // FIXME: Should be resilient (rdar://problem/38549901)
250+
public struct _Hasher {
251+
internal typealias Core = _BufferingHasher<_SipHash13Core>
252+
253+
private var _core: Core
254+
255+
@effects(releasenone)
256+
public init() {
257+
self._core = Core(seed: _Hasher._seed)
258+
}
259+
260+
@usableFromInline
261+
@effects(releasenone)
262+
internal init(_seed seed: (UInt64, UInt64)) {
263+
self._core = Core(seed: seed)
264+
}
265+
266+
/// Indicates whether we're running in an environment where hashing needs to
267+
/// be deterministic. If this is true, the hash seed is not random, and hash
268+
/// tables do not apply per-instance perturbation that is not repeatable.
269+
/// This is not recommended for production use, but it is useful in certain
270+
/// test environments where randomization may lead to unwanted nondeterminism
271+
/// of test results.
272+
public // SPI
273+
static var _isDeterministic: Bool {
274+
@inlinable
275+
@inline(__always)
276+
get {
277+
return _swift_stdlib_Hashing_parameters.deterministic;
278+
}
279+
}
280+
281+
/// The 128-bit hash seed used to initialize the hasher state. Initialized
282+
/// once during process startup.
283+
public // SPI
284+
static var _seed: (UInt64, UInt64) {
285+
@inlinable
286+
@inline(__always)
287+
get {
288+
// The seed itself is defined in C++ code so that it is initialized during
289+
// static construction. Almost every Swift program uses hash tables, so
290+
// initializing the seed during the startup seems to be the right
291+
// trade-off.
292+
return (
293+
_swift_stdlib_Hashing_parameters.seed0,
294+
_swift_stdlib_Hashing_parameters.seed1)
295+
}
296+
}
297+
298+
@inlinable
299+
@inline(__always)
300+
public mutating func combine<H: Hashable>(_ value: H) {
301+
value._hash(into: &self)
302+
}
303+
304+
//FIXME: Convert to @usableFromInline internal once integers hash correctly.
305+
@effects(releasenone)
306+
public mutating func _combine(_ value: UInt) {
307+
_core.combine(value)
308+
}
309+
310+
//FIXME: Convert to @usableFromInline internal once integers hash correctly.
311+
@effects(releasenone)
312+
public mutating func _combine(_ value: UInt64) {
313+
_core.combine(value)
314+
}
315+
316+
//FIXME: Convert to @usableFromInline internal once integers hash correctly.
317+
@effects(releasenone)
318+
public mutating func _combine(_ value: UInt32) {
319+
_core.combine(value)
320+
}
321+
322+
//FIXME: Convert to @usableFromInline internal once integers hash correctly.
323+
@effects(releasenone)
324+
public mutating func _combine(_ value: UInt16) {
325+
_core.combine(value)
326+
}
327+
328+
//FIXME: Convert to @usableFromInline internal once integers hash correctly.
329+
@effects(releasenone)
330+
public mutating func _combine(_ value: UInt8) {
331+
_core.combine(value)
332+
}
333+
334+
@effects(releasenone)
335+
public mutating func _combine(bytes value: UInt64, count: Int) {
336+
_core.combine(bytes: value, count: count)
337+
}
338+
339+
@effects(releasenone)
340+
public mutating func combine(bytes: UnsafeRawBufferPointer) {
341+
_core.combine(bytes: bytes)
342+
}
343+
344+
@effects(releasenone)
345+
public mutating func finalize() -> Int {
346+
return Int(truncatingIfNeeded: _core.finalize())
347+
}
348+
}

0 commit comments

Comments
 (0)