Skip to content

Commit 874dcae

Browse files
authored
[Serialization] Use stable hash functions
clangSerialization currently uses hash_combine/hash_value from Hashing.h, which are not guaranteed to be deterministic. Replace these uses with xxh3_64bits. Pull Request: #96136
1 parent 36c6632 commit 874dcae

File tree

4 files changed

+31
-11
lines changed

4 files changed

+31
-11
lines changed

clang/lib/AST/ODRHash.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ unsigned ODRHash::CalculateHash() {
251251

252252
assert(I == Bools.rend());
253253
Bools.clear();
254-
return ID.ComputeHash();
254+
return ID.computeStableHash();
255255
}
256256

257257
namespace {

clang/lib/Serialization/ASTReader.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ unsigned DeclarationNameKey::getHash() const {
12011201
break;
12021202
}
12031203

1204-
return ID.ComputeHash();
1204+
return ID.computeStableHash();
12051205
}
12061206

12071207
ModuleFile *
@@ -2033,7 +2033,10 @@ const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
20332033
}
20342034

20352035
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
2036-
return llvm::hash_combine(ikey.Size, ikey.ModTime);
2036+
uint8_t buf[sizeof(ikey.Size) + sizeof(ikey.ModTime)];
2037+
memcpy(buf, &ikey.Size, sizeof(ikey.Size));
2038+
memcpy(buf + sizeof(ikey.Size), &ikey.ModTime, sizeof(ikey.ModTime));
2039+
return llvm::xxh3_64bits(buf);
20372040
}
20382041

20392042
HeaderFileInfoTrait::internal_key_type
@@ -2640,8 +2643,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
26402643
return OriginalChange;
26412644
}
26422645

2643-
// FIXME: hash_value is not guaranteed to be stable!
2644-
auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
2646+
auto ContentHash = xxh3_64bits(MemBuffOrError.get()->getBuffer());
26452647
if (StoredContentHash == static_cast<uint64_t>(ContentHash))
26462648
return Change{Change::None};
26472649

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
17821782
.ValidateASTInputFilesContent) {
17831783
auto MemBuff = Cache->getBufferIfLoaded();
17841784
if (MemBuff)
1785-
ContentHash = hash_value(MemBuff->getBuffer());
1785+
ContentHash = xxh3_64bits(MemBuff->getBuffer());
17861786
else
17871787
PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
17881788
<< Entry.File.getName();
@@ -1987,7 +1987,10 @@ namespace {
19871987
// The hash is based only on size/time of the file, so that the reader can
19881988
// match even when symlinking or excess path elements ("foo/../", "../")
19891989
// change the form of the name. However, complete path is still the key.
1990-
return llvm::hash_combine(key.Size, key.ModTime);
1990+
uint8_t buf[sizeof(key.Size) + sizeof(key.ModTime)];
1991+
memcpy(buf, &key.Size, sizeof(key.Size));
1992+
memcpy(buf + sizeof(key.Size), &key.ModTime, sizeof(key.ModTime));
1993+
return llvm::xxh3_64bits(buf);
19911994
}
19921995

19931996
std::pair<unsigned, unsigned>

llvm/include/llvm/ADT/FoldingSet.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/SmallVector.h"
2222
#include "llvm/ADT/iterator.h"
2323
#include "llvm/Support/Allocator.h"
24+
#include "llvm/Support/xxhash.h"
2425
#include <cassert>
2526
#include <cstddef>
2627
#include <cstdint>
@@ -294,12 +295,19 @@ class FoldingSetNodeIDRef {
294295
FoldingSetNodeIDRef() = default;
295296
FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
296297

297-
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
298-
/// used to lookup the node in the FoldingSetBase.
298+
// Compute a strong hash value used to lookup the node in the FoldingSetBase.
299+
// The hash value is not guaranteed to be deterministic across processes.
299300
unsigned ComputeHash() const {
300301
return static_cast<unsigned>(hash_combine_range(Data, Data + Size));
301302
}
302303

304+
// Compute a deterministic hash value across processes that is suitable for
305+
// on-disk serialization.
306+
unsigned computeStableHash() const {
307+
return static_cast<unsigned>(xxh3_64bits(ArrayRef(
308+
reinterpret_cast<const uint8_t *>(Data), sizeof(unsigned) * Size)));
309+
}
310+
303311
bool operator==(FoldingSetNodeIDRef) const;
304312

305313
bool operator!=(FoldingSetNodeIDRef RHS) const { return !(*this == RHS); }
@@ -366,12 +374,19 @@ class FoldingSetNodeID {
366374
/// object to be used to compute a new profile.
367375
inline void clear() { Bits.clear(); }
368376

369-
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
370-
/// to lookup the node in the FoldingSetBase.
377+
// Compute a strong hash value for this FoldingSetNodeID, used to lookup the
378+
// node in the FoldingSetBase. The hash value is not guaranteed to be
379+
// deterministic across processes.
371380
unsigned ComputeHash() const {
372381
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
373382
}
374383

384+
// Compute a deterministic hash value across processes that is suitable for
385+
// on-disk serialization.
386+
unsigned computeStableHash() const {
387+
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).computeStableHash();
388+
}
389+
375390
/// operator== - Used to compare two nodes to each other.
376391
bool operator==(const FoldingSetNodeID &RHS) const;
377392
bool operator==(const FoldingSetNodeIDRef RHS) const;

0 commit comments

Comments
 (0)