Skip to content

Commit 28375ee

Browse files
Merge pull request #10900 from AnthonyLatsis/stable/20250601
[stable/20250601] Bring back deterministic hashing
2 parents 9cc6817 + 28f01b4 commit 28375ee

File tree

5 files changed

+136
-11
lines changed

5 files changed

+136
-11
lines changed

llvm/include/llvm/ADT/Hashing.h

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
#define LLVM_ADT_HASHING_H
4646

4747
#include "llvm/ADT/ADL.h"
48-
#include "llvm/Config/abi-breaking.h"
4948
#include "llvm/Support/DataTypes.h"
5049
#include "llvm/Support/ErrorHandling.h"
5150
#include "llvm/Support/SwapByteOrder.h"
@@ -128,6 +127,23 @@ hash_code hash_value(const std::basic_string<T> &arg);
128127
/// Compute a hash_code for a standard string.
129128
template <typename T> hash_code hash_value(const std::optional<T> &arg);
130129

130+
/// Override the execution seed with a fixed value.
131+
///
132+
/// This hashing library uses a per-execution seed designed to change on each
133+
/// run with high probability in order to ensure that the hash codes are not
134+
/// attackable and to ensure that output which is intended to be stable does
135+
/// not rely on the particulars of the hash codes produced.
136+
///
137+
/// That said, there are use cases where it is important to be able to
138+
/// reproduce *exactly* a specific behavior. To that end, we provide a function
139+
/// which will forcibly set the seed to a fixed value. This must be done at the
140+
/// start of the program, before any hashes are computed. Also, it cannot be
141+
/// undone. This makes it thread-hostile and very hard to use outside of
142+
/// immediately on start of a simple program designed for reproducible
143+
/// behavior.
144+
void set_fixed_execution_hash_seed(uint64_t fixed_value);
145+
146+
131147
// All of the implementation details of actually computing the various hash
132148
// code values are held within this namespace. These routines are included in
133149
// the header file mainly to allow inlining and constant propagation.
@@ -307,17 +323,24 @@ struct hash_state {
307323
}
308324
};
309325

310-
/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
311-
/// per process (address of a function in LLVMSupport) to prevent having users
312-
/// depend on the particular hash values. On platforms without ASLR, this is
313-
/// still likely non-deterministic per build.
326+
327+
/// A global, fixed seed-override variable.
328+
///
329+
/// This variable can be set using the \see llvm::set_fixed_execution_seed
330+
/// function. See that function for details. Do not, under any circumstances,
331+
/// set or read this variable.
332+
extern uint64_t fixed_seed_override;
333+
314334
inline uint64_t get_execution_seed() {
315-
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
316-
return static_cast<uint64_t>(
317-
reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
318-
#else
319-
return 0xff51afd7ed558ccdULL;
320-
#endif
335+
// FIXME: This needs to be a per-execution seed. This is just a placeholder
336+
// implementation. Switching to a per-execution seed is likely to flush out
337+
// instability bugs and so will happen as its own commit.
338+
//
339+
// However, if there is a fixed seed override set the first time this is
340+
// called, return that instead of the per-execution seed.
341+
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
342+
static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
343+
return seed;
321344
}
322345

323346

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ add_llvm_component_library(LLVMSupport
199199
FormatVariadic.cpp
200200
GlobPattern.cpp
201201
GraphWriter.cpp
202+
Hashing.cpp
202203
HexagonAttributeParser.cpp
203204
HexagonAttributes.cpp
204205
InitLLVM.cpp

llvm/lib/Support/Hashing.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file provides implementation bits for the LLVM common hashing
10+
// infrastructure. Documentation and most of the other information is in the
11+
// header file.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "llvm/ADT/Hashing.h"
16+
17+
using namespace llvm;
18+
19+
// Provide a definition and static initializer for the fixed seed. This
20+
// initializer should always be zero to ensure its value can never appear to be
21+
// non-zero, even during dynamic initialization.
22+
uint64_t llvm::hashing::detail::fixed_seed_override = 0;
23+
24+
// Implement the function for forced setting of the fixed seed.
25+
// FIXME: Use atomic operations here so that there is no data race.
26+
void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) {
27+
hashing::detail::fixed_seed_override = fixed_value;
28+
}

llvm/unittests/ADT/HashingTest.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,78 @@ TEST(HashingTest, HashCombineRangeLengthDiff) {
239239
}
240240
}
241241

242+
TEST(HashingTest, HashCombineRangeGoldenTest) {
243+
struct { const char *s; uint64_t hash; } golden_data[] = {
244+
#if SIZE_MAX == UINT64_MAX || SIZE_MAX == UINT32_MAX
245+
{ "a", 0xaeb6f9d5517c61f8ULL },
246+
{ "ab", 0x7ab1edb96be496b4ULL },
247+
{ "abc", 0xe38e60bf19c71a3fULL },
248+
{ "abcde", 0xd24461a66de97f6eULL },
249+
{ "abcdefgh", 0x4ef872ec411dec9dULL },
250+
{ "abcdefghijklm", 0xe8a865539f4eadfeULL },
251+
{ "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL },
252+
{ "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
253+
{ "abcdefghijklmnopqrstuvwxyzabcdef"
254+
"abcdefghijklmnopqrstuvwxyzghijkl"
255+
"abcdefghijklmnopqrstuvwxyzmnopqr"
256+
"abcdefghijklmnopqrstuvwxyzstuvwx"
257+
"abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
258+
{ "a", 0xaeb6f9d5517c61f8ULL },
259+
{ "aa", 0xf2b3b69a9736a1ebULL },
260+
{ "aaa", 0xf752eb6f07b1cafeULL },
261+
{ "aaaaa", 0x812bd21e1236954cULL },
262+
{ "aaaaaaaa", 0xff07a2cff08ac587ULL },
263+
{ "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL },
264+
{ "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL },
265+
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
266+
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
267+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
268+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
269+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
270+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
271+
{ "z", 0x1ba160d7e8f8785cULL },
272+
{ "zz", 0x2c5c03172f1285d7ULL },
273+
{ "zzz", 0x9d2c4f4b507a2ac3ULL },
274+
{ "zzzzz", 0x0f03b9031735693aULL },
275+
{ "zzzzzzzz", 0xe674147c8582c08eULL },
276+
{ "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL },
277+
{ "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL },
278+
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
279+
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
280+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
281+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
282+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
283+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
284+
{ "a", 0xaeb6f9d5517c61f8ULL },
285+
{ "ab", 0x7ab1edb96be496b4ULL },
286+
{ "aba", 0x3edb049950884d0aULL },
287+
{ "ababa", 0x8f2de9e73a97714bULL },
288+
{ "abababab", 0xee14a29ddf0ce54cULL },
289+
{ "ababababababa", 0x38b3ddaada2d52b4ULL },
290+
{ "ababababababababababa", 0xd3665364219f2b85ULL },
291+
{ "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
292+
{ "abababababababababababababababab"
293+
"abababababababababababababababab"
294+
"abababababababababababababababab"
295+
"abababababababababababababababab"
296+
"abababababababababababababababab", 0x840192d129f7a22bULL }
297+
#else
298+
#error This test only supports 64-bit and 32-bit systems.
299+
#endif
300+
};
301+
for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
302+
StringRef str = golden_data[i].s;
303+
hash_code hash = hash_combine_range(str.begin(), str.end());
304+
#if 0 // Enable this to generate paste-able text for the above structure.
305+
std::string member_str = "\"" + str.str() + "\",";
306+
fprintf(stderr, " { %-35s 0x%016llxULL },\n",
307+
member_str.c_str(), static_cast<uint64_t>(hash));
308+
#endif
309+
EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
310+
static_cast<size_t>(hash));
311+
}
312+
}
313+
242314
TEST(HashingTest, HashCombineBasicTest) {
243315
// Hashing a sequence of homogenous types matches range hashing.
244316
const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;

llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ static_library("Support") {
9494
"FormattedStream.cpp",
9595
"GlobPattern.cpp",
9696
"GraphWriter.cpp",
97+
"Hashing.cpp",
9798
"HexagonAttributeParser.cpp",
9899
"HexagonAttributes.cpp",
99100
"InitLLVM.cpp",

0 commit comments

Comments
 (0)