Skip to content

Commit e715fc6

Browse files
committed
[StructuralHash] Refactor
- Use stable_hash instead of uint64_t - Rename update* to hash* functions. They compute stable_hash locally and return it.
1 parent 173c682 commit e715fc6

File tree

3 files changed

+88
-53
lines changed

3 files changed

+88
-53
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
#ifndef LLVM_IR_STRUCTURALHASH_H
1515
#define LLVM_IR_STRUCTURALHASH_H
1616

17+
#include "llvm/ADT/StableHashing.h"
1718
#include <cstdint>
1819

1920
namespace llvm {
2021

2122
class Function;
2223
class Module;
2324

24-
using IRHash = uint64_t;
25+
using IRHash = stable_hash;
2526

2627
/// Returns a hash of the function \p F.
2728
/// \param F The function to hash.

llvm/lib/IR/StructuralHash.cpp

Lines changed: 78 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -24,61 +24,86 @@ namespace {
2424
// by the MergeFunctions pass.
2525

2626
class StructuralHashImpl {
27-
uint64_t Hash = 4;
27+
stable_hash Hash = 4;
2828

29-
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
29+
bool DetailedHash;
3030

3131
// This will produce different values on 32-bit and 64-bit systens as
3232
// hash_combine returns a size_t. However, this is only used for
3333
// detailed hashing which, in-tree, only needs to distinguish between
3434
// differences in functions.
35-
template <typename T> void hashArbitaryType(const T &V) {
36-
hash(hash_combine(V));
35+
// TODO: This is not stable.
36+
template <typename T> stable_hash hashArbitaryType(const T &V) {
37+
return hash_combine(V);
3738
}
3839

39-
void hashType(Type *ValueType) {
40-
hash(ValueType->getTypeID());
40+
stable_hash hashType(Type *ValueType) {
41+
SmallVector<stable_hash> Hashes;
42+
Hashes.emplace_back(ValueType->getTypeID());
4143
if (ValueType->isIntegerTy())
42-
hash(ValueType->getIntegerBitWidth());
44+
Hashes.emplace_back(ValueType->getIntegerBitWidth());
45+
return stable_hash_combine(Hashes);
4346
}
4447

4548
public:
46-
StructuralHashImpl() = default;
47-
48-
void updateOperand(Value *Operand) {
49-
hashType(Operand->getType());
50-
51-
// The cases enumerated below are not exhaustive and are only aimed to
52-
// get decent coverage over the function.
53-
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54-
hashArbitaryType(ConstInt->getValue());
55-
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56-
hashArbitaryType(ConstFP->getValue());
57-
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58-
hash(Arg->getArgNo());
59-
} else if (Function *Func = dyn_cast<Function>(Operand)) {
49+
StructuralHashImpl() = delete;
50+
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
51+
52+
stable_hash hashConstant(Constant *C) {
53+
SmallVector<stable_hash> Hashes;
54+
// TODO: hashArbitaryType() is not stable.
55+
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
56+
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
57+
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
58+
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
59+
} else if (Function *Func = dyn_cast<Function>(C))
6060
// Hashing the name will be deterministic as LLVM's hashing infrastructure
6161
// has explicit support for hashing strings and will not simply hash
6262
// the pointer.
63-
hashArbitaryType(Func->getName());
64-
}
63+
Hashes.emplace_back(hashArbitaryType(Func->getName()));
64+
65+
return stable_hash_combine(Hashes);
66+
}
67+
68+
stable_hash hashValue(Value *V) {
69+
// Check constant and return its hash.
70+
Constant *C = dyn_cast<Constant>(V);
71+
if (C)
72+
return hashConstant(C);
73+
74+
// Hash argument number.
75+
SmallVector<stable_hash> Hashes;
76+
if (Argument *Arg = dyn_cast<Argument>(V))
77+
Hashes.emplace_back(Arg->getArgNo());
78+
79+
return stable_hash_combine(Hashes);
6580
}
6681

67-
void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68-
hash(Inst.getOpcode());
82+
stable_hash hashOperand(Value *Operand) {
83+
SmallVector<stable_hash> Hashes;
84+
Hashes.emplace_back(hashType(Operand->getType()));
85+
Hashes.emplace_back(hashValue(Operand));
86+
return stable_hash_combine(Hashes);
87+
}
88+
89+
stable_hash hashInstruction(const Instruction &Inst) {
90+
SmallVector<stable_hash> Hashes;
91+
Hashes.emplace_back(Inst.getOpcode());
6992

7093
if (!DetailedHash)
71-
return;
94+
return stable_hash_combine(Hashes);
7295

73-
hashType(Inst.getType());
96+
Hashes.emplace_back(hashType(Inst.getType()));
7497

7598
// Handle additional properties of specific instructions that cause
7699
// semantic differences in the IR.
77100
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78-
hash(ComparisonInstruction->getPredicate());
101+
Hashes.emplace_back(ComparisonInstruction->getPredicate());
79102

80103
for (const auto &Op : Inst.operands())
81-
updateOperand(Op);
104+
Hashes.emplace_back(hashOperand(Op));
105+
106+
return stable_hash_combine(Hashes);
82107
}
83108

84109
// A function hash is calculated by considering only the number of arguments
@@ -97,15 +122,17 @@ class StructuralHashImpl {
97122
// expensive checks for pass modification status). When modifying this
98123
// function, most changes should be gated behind an option and enabled
99124
// selectively.
100-
void update(const Function &F, bool DetailedHash) {
125+
void update(const Function &F) {
101126
// Declarations don't affect analyses.
102127
if (F.isDeclaration())
103128
return;
104129

105-
hash(0x62642d6b6b2d6b72); // Function header
130+
SmallVector<stable_hash> Hashes;
131+
Hashes.emplace_back(Hash);
132+
Hashes.emplace_back(0x62642d6b6b2d6b72); // Function header
106133

107-
hash(F.isVarArg());
108-
hash(F.arg_size());
134+
Hashes.emplace_back(F.isVarArg());
135+
Hashes.emplace_back(F.arg_size());
109136

110137
SmallVector<const BasicBlock *, 8> BBs;
111138
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -121,14 +148,17 @@ class StructuralHashImpl {
121148
// This random value acts as a block header, as otherwise the partition of
122149
// opcodes into BBs wouldn't affect the hash, only the order of the
123150
// opcodes
124-
hash(45798);
151+
Hashes.emplace_back(45798);
125152
for (auto &Inst : *BB)
126-
updateInstruction(Inst, DetailedHash);
153+
Hashes.emplace_back(hashInstruction(Inst));
127154

128155
for (const BasicBlock *Succ : successors(BB))
129156
if (VisitedBBs.insert(Succ).second)
130157
BBs.push_back(Succ);
131158
}
159+
160+
// Update the combined hash in place.
161+
Hash = stable_hash_combine(Hashes);
132162
}
133163

134164
void update(const GlobalVariable &GV) {
@@ -137,15 +167,20 @@ class StructuralHashImpl {
137167
// we ignore anything with the `.llvm` prefix
138168
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
139169
return;
140-
hash(23456); // Global header
141-
hash(GV.getValueType()->getTypeID());
170+
SmallVector<stable_hash> Hashes;
171+
Hashes.emplace_back(Hash);
172+
Hashes.emplace_back(23456); // Global header
173+
Hashes.emplace_back(GV.getValueType()->getTypeID());
174+
175+
// Update the combined hash in place.
176+
Hash = stable_hash_combine(Hashes);
142177
}
143178

144-
void update(const Module &M, bool DetailedHash) {
179+
void update(const Module &M) {
145180
for (const GlobalVariable &GV : M.globals())
146181
update(GV);
147182
for (const Function &F : M)
148-
update(F, DetailedHash);
183+
update(F);
149184
}
150185

151186
uint64_t getHash() const { return Hash; }
@@ -154,13 +189,13 @@ class StructuralHashImpl {
154189
} // namespace
155190

156191
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
157-
StructuralHashImpl H;
158-
H.update(F, DetailedHash);
192+
StructuralHashImpl H(DetailedHash);
193+
H.update(F);
159194
return H.getHash();
160195
}
161196

162197
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
163-
StructuralHashImpl H;
164-
H.update(M, DetailedHash);
198+
StructuralHashImpl H(DetailedHash);
199+
H.update(M);
165200
return H.getHash();
166201
}

llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ lpad:
6363
resume { ptr, i32 } zeroinitializer
6464
}
6565

66+
define i8 @call_with_same_range() {
67+
; CHECK-LABEL: @call_with_same_range
68+
; CHECK: tail call i8 @call_with_range
69+
bitcast i8 0 to i8
70+
%out = call i8 @dummy(), !range !0
71+
ret i8 %out
72+
}
73+
6674
define i8 @invoke_with_same_range() personality ptr undef {
6775
; CHECK-LABEL: @invoke_with_same_range()
6876
; CHECK: tail call i8 @invoke_with_range()
@@ -76,15 +84,6 @@ lpad:
7684
resume { ptr, i32 } zeroinitializer
7785
}
7886

79-
define i8 @call_with_same_range() {
80-
; CHECK-LABEL: @call_with_same_range
81-
; CHECK: tail call i8 @call_with_range
82-
bitcast i8 0 to i8
83-
%out = call i8 @dummy(), !range !0
84-
ret i8 %out
85-
}
86-
87-
8887
declare i8 @dummy();
8988
declare i32 @__gxx_personality_v0(...)
9089

0 commit comments

Comments
 (0)