Skip to content

[stable/20250402] Bring back deterministic hashing #10715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 34 additions & 11 deletions llvm/include/llvm/ADT/Hashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
#ifndef LLVM_ADT_HASHING_H
#define LLVM_ADT_HASHING_H

#include "llvm/Config/abi-breaking.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SwapByteOrder.h"
Expand Down Expand Up @@ -127,6 +126,23 @@ hash_code hash_value(const std::basic_string<T> &arg);
/// Compute a hash_code for a standard string.
template <typename T> hash_code hash_value(const std::optional<T> &arg);

/// Override the execution seed with a fixed value.
///
/// This hashing library uses a per-execution seed designed to change on each
/// run with high probability in order to ensure that the hash codes are not
/// attackable and to ensure that output which is intended to be stable does
/// not rely on the particulars of the hash codes produced.
///
/// That said, there are use cases where it is important to be able to
/// reproduce *exactly* a specific behavior. To that end, we provide a function
/// which will forcibly set the seed to a fixed value. This must be done at the
/// start of the program, before any hashes are computed. Also, it cannot be
/// undone. This makes it thread-hostile and very hard to use outside of
/// immediately on start of a simple program designed for reproducible
/// behavior.
void set_fixed_execution_hash_seed(uint64_t fixed_value);


// All of the implementation details of actually computing the various hash
// code values are held within this namespace. These routines are included in
// the header file mainly to allow inlining and constant propagation.
Expand Down Expand Up @@ -306,17 +322,24 @@ struct hash_state {
}
};

/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
/// per process (address of a function in LLVMSupport) to prevent having users
/// depend on the particular hash values. On platforms without ASLR, this is
/// still likely non-deterministic per build.

/// A global, fixed seed-override variable.
///
/// This variable can be set using the \see llvm::set_fixed_execution_seed
/// function. See that function for details. Do not, under any circumstances,
/// set or read this variable.
extern uint64_t fixed_seed_override;

inline uint64_t get_execution_seed() {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
return static_cast<uint64_t>(
reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
#else
return 0xff51afd7ed558ccdULL;
#endif
// FIXME: This needs to be a per-execution seed. This is just a placeholder
// implementation. Switching to a per-execution seed is likely to flush out
// instability bugs and so will happen as its own commit.
//
// However, if there is a fixed seed override set the first time this is
// called, return that instead of the per-execution seed.
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
return seed;
}


Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ add_llvm_component_library(LLVMSupport
FormatVariadic.cpp
GlobPattern.cpp
GraphWriter.cpp
Hashing.cpp
HexagonAttributeParser.cpp
HexagonAttributes.cpp
InitLLVM.cpp
Expand Down
28 changes: 28 additions & 0 deletions llvm/lib/Support/Hashing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides implementation bits for the LLVM common hashing
// infrastructure. Documentation and most of the other information is in the
// header file.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/Hashing.h"

using namespace llvm;

// Provide a definition and static initializer for the fixed seed. This
// initializer should always be zero to ensure its value can never appear to be
// non-zero, even during dynamic initialization.
uint64_t llvm::hashing::detail::fixed_seed_override = 0;

// Implement the function for forced setting of the fixed seed.
// FIXME: Use atomic operations here so that there is no data race.
void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) {
hashing::detail::fixed_seed_override = fixed_value;
}
72 changes: 72 additions & 0 deletions llvm/unittests/ADT/HashingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,78 @@ TEST(HashingTest, HashCombineRangeLengthDiff) {
}
}

TEST(HashingTest, HashCombineRangeGoldenTest) {
struct { const char *s; uint64_t hash; } golden_data[] = {
#if SIZE_MAX == UINT64_MAX || SIZE_MAX == UINT32_MAX
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "ab", 0x7ab1edb96be496b4ULL },
{ "abc", 0xe38e60bf19c71a3fULL },
{ "abcde", 0xd24461a66de97f6eULL },
{ "abcdefgh", 0x4ef872ec411dec9dULL },
{ "abcdefghijklm", 0xe8a865539f4eadfeULL },
{ "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL },
{ "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
{ "abcdefghijklmnopqrstuvwxyzabcdef"
"abcdefghijklmnopqrstuvwxyzghijkl"
"abcdefghijklmnopqrstuvwxyzmnopqr"
"abcdefghijklmnopqrstuvwxyzstuvwx"
"abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "aa", 0xf2b3b69a9736a1ebULL },
{ "aaa", 0xf752eb6f07b1cafeULL },
{ "aaaaa", 0x812bd21e1236954cULL },
{ "aaaaaaaa", 0xff07a2cff08ac587ULL },
{ "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL },
{ "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL },
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
{ "z", 0x1ba160d7e8f8785cULL },
{ "zz", 0x2c5c03172f1285d7ULL },
{ "zzz", 0x9d2c4f4b507a2ac3ULL },
{ "zzzzz", 0x0f03b9031735693aULL },
{ "zzzzzzzz", 0xe674147c8582c08eULL },
{ "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL },
{ "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL },
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "ab", 0x7ab1edb96be496b4ULL },
{ "aba", 0x3edb049950884d0aULL },
{ "ababa", 0x8f2de9e73a97714bULL },
{ "abababab", 0xee14a29ddf0ce54cULL },
{ "ababababababa", 0x38b3ddaada2d52b4ULL },
{ "ababababababababababa", 0xd3665364219f2b85ULL },
{ "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
{ "abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab", 0x840192d129f7a22bULL }
#else
#error This test only supports 64-bit and 32-bit systems.
#endif
};
for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
StringRef str = golden_data[i].s;
hash_code hash = hash_combine_range(str.begin(), str.end());
#if 0 // Enable this to generate paste-able text for the above structure.
std::string member_str = "\"" + str.str() + "\",";
fprintf(stderr, " { %-35s 0x%016llxULL },\n",
member_str.c_str(), static_cast<uint64_t>(hash));
#endif
EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
static_cast<size_t>(hash));
}
}

TEST(HashingTest, HashCombineBasicTest) {
// Hashing a sequence of homogenous types matches range hashing.
const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;
Expand Down
1 change: 1 addition & 0 deletions llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ static_library("Support") {
"FormattedStream.cpp",
"GlobPattern.cpp",
"GraphWriter.cpp",
"Hashing.cpp",
"HexagonAttributeParser.cpp",
"HexagonAttributes.cpp",
"InitLLVM.cpp",
Expand Down