Skip to content

Commit f5cb08e

Browse files
committed
Add a Stable Hash Algorithm
Use SipHash-2-4 to replace llvm::MD5 as the hashing implementation backing Fingerprints and the interface hash.
1 parent 31d43bf commit f5cb08e

File tree

4 files changed

+671
-0
lines changed

4 files changed

+671
-0
lines changed

include/swift/Basic/StableHasher.h

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
//===--- StableHasher.h - Stable Hashing ------------------------*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// An implementation of a stable hashing for Swift.
14+
//
15+
// Derived from the reference implementation for SipHash 2-4:
16+
// https://github.com/veorq/SipHash
17+
//
18+
// With inline buffering derived from the hash implementation in the Swift
19+
// Standard Library.
20+
//
21+
//===----------------------------------------------------------------------===//
22+
23+
#ifndef SWIFT_BASIC_STABLEHASHER_H
24+
#define SWIFT_BASIC_STABLEHASHER_H
25+
26+
#include "llvm/Support/Endian.h"
27+
#include "llvm/ADT/StringRef.h"
28+
#include <algorithm>
29+
#include <cstring>
30+
#include <string>
31+
#include <vector>
32+
#include <tuple>
33+
#include <utility>
34+
35+
namespace swift {
36+
37+
/// A \c StableHasher is an implementation of a 128-bit stable hash - built for
38+
/// speed.
39+
///
40+
/// A "stable" hash in this context refers to the idea that the output of this
41+
/// hasher is deterministic across instantiations of the compiler. In order to
42+
/// support this goal, this hasher disallows run-dependent or otherwise
43+
/// unstable values from entering into the hash-combiner. For example, this
44+
/// hasher will statically reject attempts to hash-combine pointers and
45+
/// aggregates of pointers. Note that this relies on user cooperation as well.
46+
/// The order of hash-combines is pivotal, thus e.g. collection values should
47+
/// have a guaranteed order or be sorted before being hash-combined.
48+
///
49+
/// Stable hash values must also be independent of the host architecture. For
50+
/// integral types and enumerations, the default hash-combiner will
51+
/// automatically byte-swap to a common little-endian format.
52+
///
53+
/// This hasher also allows for extending the hash-combiner to user-defined
54+
/// types. To do so, define a (partial) specialization of
55+
/// \c swift::StableHasher::Combiner<T>
56+
///
57+
/// template <typename T>
58+
/// struct swift::StableHasher::Combiner<std::optional<T>> {
59+
/// static void combine(StableHasher &hasher, const std::optional<T> &O) {
60+
/// if (!O.has_value()) {
61+
/// hasher.combine(0);
62+
/// } else {
63+
/// hasher.combine(1);
64+
/// swift::StableHasher::Combiner<T>::combine(hasher, O.value());
65+
/// }
66+
/// }
67+
/// };
68+
///
69+
/// The current implementation is the 128-bit (extended) SipHash 2-4, which
70+
/// has been empirically demonstrated to have the best throughput relative to
71+
/// the other SipHash tunings.
72+
class StableHasher final {
73+
private:
74+
struct State {
75+
uint64_t v0 = 0x736F6D6570736575;
76+
uint64_t v1 = 0x646F72616E646f6D;
77+
uint64_t v2 = 0x6C7967656E657261;
78+
uint64_t v3 = 0x7465646279746573;
79+
} state;
80+
81+
// A buffer of up to 8 items that this hasher uses to amortize the cost
82+
// of the hashing function for hash-combines shorter than 64-bits.
83+
uint8_t byteBuffer[8] = {0};
84+
// msb lsb
85+
// +---------+-------+-------+-------+-------+-------+-------+-------+
86+
// |byteCount| length (<= 56 bits) |
87+
// +---------+-------+-------+-------+-------+-------+-------+-------+
88+
uint64_t lengthAndByteCount = 0;
89+
90+
public:
91+
static StableHasher defaultHasher() { StableHasher hasher{0, 0}; return hasher; }
92+
93+
explicit StableHasher(uint64_t leftSeed, uint64_t rightSeed) {
94+
state.v3 ^= rightSeed;
95+
state.v2 ^= leftSeed;
96+
state.v1 ^= rightSeed;
97+
state.v0 ^= leftSeed;
98+
99+
state.v1 ^= 0xEE;
100+
}
101+
102+
public:
103+
template <typename T> struct Combiner {
104+
// static void combine(StableHasher &hasher, const T &Val);
105+
};
106+
107+
public:
108+
/// Consume this stable hasher and compute the final 128-bit stable hash value.
109+
std::pair<uint64_t, uint64_t> finalize() &&;
110+
111+
template <uint64_t N> void combine(uint8_t (&bits)[N]) {
112+
static_assert(N > 0, "Cannot append VLA");
113+
static_assert(N <= 8, "Can only append up to 64 bits at a time");
114+
115+
lengthAndByteCount += N;
116+
117+
const uint64_t bufLen = getBufferLength();
118+
const uint64_t available = sizeof(byteBuffer) - bufLen;
119+
120+
// Cram as many bytes into the buffer as we can.
121+
const uint64_t nhead = std::min(N, available);
122+
if (nhead == sizeof(byteBuffer)) {
123+
// We have headroom available for all 64 bits. Eagerly compress the
124+
// now-full buffer into our state.
125+
std::copy(bits, bits + sizeof(byteBuffer), byteBuffer);
126+
} else if (N >= available) {
127+
// There was some excess - append as many bytes as we can hold and
128+
// compress the buffer into our state.
129+
std::copy(bits, bits + nhead, byteBuffer + bufLen);
130+
} else {
131+
// We have headroom available for these bits.
132+
std::copy(bits, bits + N, byteBuffer + bufLen);
133+
return setBufferLength(bufLen + N);
134+
}
135+
136+
constexpr auto endian = llvm::support::endianness::little;
137+
compress(llvm::support::endian::read<uint64_t>(byteBuffer, endian));
138+
139+
// Now reseed the buffer with the remaining bytes.
140+
const uint64_t remainder = N - available;
141+
std::copy(bits + available, bits + N, byteBuffer);
142+
return setBufferLength(remainder);
143+
}
144+
145+
template <
146+
typename T,
147+
typename std::enable_if<std::is_integral<T>::value>::type * = nullptr>
148+
void combine(T bits) {
149+
constexpr auto endian = llvm::support::endianness::little;
150+
uint8_t buf[sizeof(T)] = {0};
151+
bits = llvm::support::endian::byte_swap<T>(bits, endian);
152+
std::memcpy(buf, &bits, sizeof(T));
153+
combine<sizeof(T)>(buf);
154+
}
155+
156+
template <
157+
typename EnumType,
158+
typename std::enable_if<std::is_enum<EnumType>::value>::type * = nullptr>
159+
void combine(EnumType value) {
160+
using Underlying = typename std::underlying_type<EnumType>::type;
161+
return this->template combine<Underlying>(static_cast<Underlying>(value));
162+
}
163+
164+
template <typename T>
165+
auto combine(const T *ptr) -> decltype("Cannot hash-combine pointers!"){};
166+
167+
template <typename T, typename... Ts>
168+
void combine(const T &arg, const Ts &... args) {
169+
return combine_many(arg, args...);
170+
}
171+
172+
template <typename T, typename U> void combine(const std::pair<T, U> &arg) {
173+
return combine_many(arg.first, arg.second);
174+
}
175+
176+
template <typename T> void combine(const std::basic_string<T> &arg) {
177+
return combine_range(arg.begin(), arg.end());
178+
}
179+
180+
void combine(llvm::StringRef arg) {
181+
return combine_range(arg.begin(), arg.end());
182+
}
183+
184+
template <typename T,
185+
decltype(StableHasher::Combiner<T>::combine) * = nullptr>
186+
void combine(const T &val) {
187+
return StableHasher::Combiner<T>::combine(*this, val);
188+
}
189+
190+
template <typename ValueT> void combine_range(ValueT first, ValueT last) {
191+
combine(std::distance(first, last));
192+
while (first != last) {
193+
combine(*first++);
194+
}
195+
}
196+
197+
template <typename... Ts> void combine(const std::tuple<Ts...> &arg) {
198+
return combine_tuple(arg, typename std::index_sequence_for<Ts...>{});
199+
}
200+
201+
private:
202+
template <typename... Ts, unsigned... Indices>
203+
void combine_tuple(const std::tuple<Ts...> &arg,
204+
std::index_sequence<Indices...> indices) {
205+
return combine_many(hash_value(std::get<Indices>(arg))...);
206+
}
207+
208+
// base case.
209+
void combine_many() {}
210+
211+
// recursive case
212+
template <typename T, typename... Ts>
213+
void combine_many(const T &arg, const Ts &... args) {
214+
combine(arg);
215+
return combine_many(args...);
216+
}
217+
218+
private:
219+
/// Return the number of bytes in the inline buffer.
220+
uint64_t getBufferLength() const { return lengthAndByteCount >> 56; }
221+
/// Set the number of bytes in the inline buffer.
222+
void setBufferLength(uint64_t newLen) {
223+
lengthAndByteCount = getDigestLength() | (newLen << 56);
224+
}
225+
226+
/// Return the number of bytes that have been hash-combined so far.
227+
uint64_t getDigestLength() const {
228+
return lengthAndByteCount & ~(uint64_t(0xFF) << 56);
229+
}
230+
231+
void compress(uint64_t value);
232+
};
233+
234+
} // namespace swift
235+
236+
#endif // SWIFT_BASIC_STABLEHASHER_H

lib/Basic/StableHasher.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//===--- StableHasher.cpp - Stable Hasher ---------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "swift/Basic/StableHasher.h"
14+
15+
using namespace swift;
16+
17+
#define ROTATE_LEFT(ROTAND, DISTANCE) \
18+
(uint64_t)((ROTAND) << (DISTANCE)) | ((ROTAND) >> (64 - (DISTANCE)))
19+
20+
namespace {
21+
static inline void sip_round(uint64_t &v0, uint64_t &v1, uint64_t &v2,
22+
uint64_t &v3) {
23+
v0 += v1;
24+
v1 = ROTATE_LEFT(v1, 13);
25+
v1 ^= v0;
26+
v0 = ROTATE_LEFT(v0, 32);
27+
v2 += v3;
28+
v3 = ROTATE_LEFT(v3, 16);
29+
v3 ^= v2;
30+
v0 += v3;
31+
v3 = ROTATE_LEFT(v3, 21);
32+
v3 ^= v0;
33+
v2 += v1;
34+
v1 = ROTATE_LEFT(v1, 17);
35+
v1 ^= v2;
36+
v2 = ROTATE_LEFT(v2, 32);
37+
}
38+
}; // end anonymous namespace
39+
40+
void StableHasher::compress(uint64_t value) {
41+
state.v3 ^= value;
42+
for (unsigned i = 0; i < 2; ++i) {
43+
::sip_round(state.v0, state.v1, state.v2, state.v3);
44+
}
45+
state.v0 ^= value;
46+
}
47+
48+
std::pair<uint64_t, uint64_t> StableHasher::finalize() && {
49+
auto fillBitsFromBuffer = [](uint64_t fill, uint8_t *bits) {
50+
uint64_t head = 0;
51+
switch (fill) {
52+
case 7:
53+
head |= uint64_t(bits[6]) << 48;
54+
LLVM_FALLTHROUGH;
55+
case 6:
56+
head |= uint64_t(bits[5]) << 40;
57+
LLVM_FALLTHROUGH;
58+
case 5:
59+
head |= uint64_t(bits[4]) << 32;
60+
LLVM_FALLTHROUGH;
61+
case 4:
62+
head |= uint64_t(bits[3]) << 24;
63+
LLVM_FALLTHROUGH;
64+
case 3:
65+
head |= uint64_t(bits[2]) << 16;
66+
LLVM_FALLTHROUGH;
67+
case 2:
68+
head |= uint64_t(bits[1]) << 8;
69+
LLVM_FALLTHROUGH;
70+
case 1:
71+
head |= uint64_t(bits[0]);
72+
break;
73+
case 0:
74+
break;
75+
default:
76+
break;
77+
}
78+
return head;
79+
};
80+
81+
const uint64_t b = fillBitsFromBuffer(getBufferLength(), byteBuffer);
82+
compress(((getDigestLength() & 0xFF) << 56) | b);
83+
84+
state.v2 ^= 0xEE;
85+
86+
for (unsigned i = 0; i < 4; ++i) {
87+
::sip_round(state.v0, state.v1, state.v2, state.v3);
88+
}
89+
90+
const uint64_t h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
91+
92+
state.v1 ^= 0xDD;
93+
94+
for (unsigned i = 0; i < 4; ++i) {
95+
::sip_round(state.v0, state.v1, state.v2, state.v3);
96+
}
97+
98+
const uint64_t h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
99+
100+
return std::make_pair(h1, h2);
101+
}
102+

unittests/Basic/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ add_swift_unittest(SwiftBasicTests
2525
PrefixMapTest.cpp
2626
RangeTest.cpp
2727
SourceManagerTest.cpp
28+
StableHasher.cpp
2829
STLExtrasTest.cpp
2930
StringExtrasTest.cpp
3031
SuccessorMapTest.cpp

0 commit comments

Comments
 (0)