Skip to content

Commit b667d16

Browse files
authored
[StructuralHash] Refactor (#112621)
This is largely NFC, and it prepares for #112638. - Use stable_hash instead of uint64_t - Rename update* to hash* functions. They compute stable_hash locally and return it. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent a917ae0 commit b667d16

File tree

4 files changed

+100
-63
lines changed

4 files changed

+100
-63
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,26 @@
1414
#ifndef LLVM_IR_STRUCTURALHASH_H
1515
#define LLVM_IR_STRUCTURALHASH_H
1616

17+
#include "llvm/ADT/StableHashing.h"
1718
#include <cstdint>
1819

1920
namespace llvm {
2021

2122
class Function;
2223
class Module;
2324

24-
using IRHash = uint64_t;
25-
2625
/// Returns a hash of the function \p F.
2726
/// \param F The function to hash.
2827
/// \param DetailedHash Whether or not to encode additional information in the
2928
/// hash. The additional information added into the hash when this flag is set
3029
/// to true includes instruction and operand type information.
31-
IRHash StructuralHash(const Function &F, bool DetailedHash = false);
30+
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3231

3332
/// Returns a hash of the module \p M by hashing all functions and global
3433
/// variables contained within. \param M The module to hash. \param DetailedHash
3534
/// Whether or not to encode additional information in the function hashes that
3635
/// composed the module hash.
37-
IRHash StructuralHash(const Module &M, bool DetailedHash = false);
36+
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
3837

3938
} // end namespace llvm
4039

llvm/lib/IR/StructuralHash.cpp

Lines changed: 86 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,61 +24,93 @@ namespace {
2424
// by the MergeFunctions pass.
2525

2626
class StructuralHashImpl {
27-
uint64_t Hash = 4;
27+
stable_hash Hash = 4;
2828

29-
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
29+
bool DetailedHash;
30+
31+
// This random value acts as a block header, as otherwise the partition of
32+
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
33+
static constexpr stable_hash BlockHeaderHash = 45798;
34+
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
35+
static constexpr stable_hash GlobalHeaderHash = 23456;
3036

3137
// This will produce different values on 32-bit and 64-bit systens as
3238
// hash_combine returns a size_t. However, this is only used for
3339
// detailed hashing which, in-tree, only needs to distinguish between
3440
// differences in functions.
35-
template <typename T> void hashArbitaryType(const T &V) {
36-
hash(hash_combine(V));
41+
// TODO: This is not stable.
42+
template <typename T> stable_hash hashArbitaryType(const T &V) {
43+
return hash_combine(V);
3744
}
3845

39-
void hashType(Type *ValueType) {
40-
hash(ValueType->getTypeID());
46+
stable_hash hashType(Type *ValueType) {
47+
SmallVector<stable_hash> Hashes;
48+
Hashes.emplace_back(ValueType->getTypeID());
4149
if (ValueType->isIntegerTy())
42-
hash(ValueType->getIntegerBitWidth());
50+
Hashes.emplace_back(ValueType->getIntegerBitWidth());
51+
return stable_hash_combine(Hashes);
4352
}
4453

4554
public:
46-
StructuralHashImpl() = default;
47-
48-
void updateOperand(Value *Operand) {
49-
hashType(Operand->getType());
50-
51-
// The cases enumerated below are not exhaustive and are only aimed to
52-
// get decent coverage over the function.
53-
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54-
hashArbitaryType(ConstInt->getValue());
55-
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56-
hashArbitaryType(ConstFP->getValue());
57-
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58-
hash(Arg->getArgNo());
59-
} else if (Function *Func = dyn_cast<Function>(Operand)) {
55+
StructuralHashImpl() = delete;
56+
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
57+
58+
stable_hash hashConstant(Constant *C) {
59+
SmallVector<stable_hash> Hashes;
60+
// TODO: hashArbitaryType() is not stable.
61+
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
62+
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
63+
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
64+
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
65+
} else if (Function *Func = dyn_cast<Function>(C)) {
6066
// Hashing the name will be deterministic as LLVM's hashing infrastructure
6167
// has explicit support for hashing strings and will not simply hash
6268
// the pointer.
63-
hashArbitaryType(Func->getName());
69+
Hashes.emplace_back(hashArbitaryType(Func->getName()));
6470
}
71+
72+
return stable_hash_combine(Hashes);
73+
}
74+
75+
stable_hash hashValue(Value *V) {
76+
// Check constant and return its hash.
77+
Constant *C = dyn_cast<Constant>(V);
78+
if (C)
79+
return hashConstant(C);
80+
81+
// Hash argument number.
82+
SmallVector<stable_hash> Hashes;
83+
if (Argument *Arg = dyn_cast<Argument>(V))
84+
Hashes.emplace_back(Arg->getArgNo());
85+
86+
return stable_hash_combine(Hashes);
6587
}
6688

67-
void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68-
hash(Inst.getOpcode());
89+
stable_hash hashOperand(Value *Operand) {
90+
SmallVector<stable_hash> Hashes;
91+
Hashes.emplace_back(hashType(Operand->getType()));
92+
Hashes.emplace_back(hashValue(Operand));
93+
return stable_hash_combine(Hashes);
94+
}
95+
96+
stable_hash hashInstruction(const Instruction &Inst) {
97+
SmallVector<stable_hash> Hashes;
98+
Hashes.emplace_back(Inst.getOpcode());
6999

70100
if (!DetailedHash)
71-
return;
101+
return stable_hash_combine(Hashes);
72102

73-
hashType(Inst.getType());
103+
Hashes.emplace_back(hashType(Inst.getType()));
74104

75105
// Handle additional properties of specific instructions that cause
76106
// semantic differences in the IR.
77107
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78-
hash(ComparisonInstruction->getPredicate());
108+
Hashes.emplace_back(ComparisonInstruction->getPredicate());
79109

80110
for (const auto &Op : Inst.operands())
81-
updateOperand(Op);
111+
Hashes.emplace_back(hashOperand(Op));
112+
113+
return stable_hash_combine(Hashes);
82114
}
83115

84116
// A function hash is calculated by considering only the number of arguments
@@ -97,15 +129,17 @@ class StructuralHashImpl {
97129
// expensive checks for pass modification status). When modifying this
98130
// function, most changes should be gated behind an option and enabled
99131
// selectively.
100-
void update(const Function &F, bool DetailedHash) {
132+
void update(const Function &F) {
101133
// Declarations don't affect analyses.
102134
if (F.isDeclaration())
103135
return;
104136

105-
hash(0x62642d6b6b2d6b72); // Function header
137+
SmallVector<stable_hash> Hashes;
138+
Hashes.emplace_back(Hash);
139+
Hashes.emplace_back(FunctionHeaderHash);
106140

107-
hash(F.isVarArg());
108-
hash(F.arg_size());
141+
Hashes.emplace_back(F.isVarArg());
142+
Hashes.emplace_back(F.arg_size());
109143

110144
SmallVector<const BasicBlock *, 8> BBs;
111145
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -118,17 +152,17 @@ class StructuralHashImpl {
118152
while (!BBs.empty()) {
119153
const BasicBlock *BB = BBs.pop_back_val();
120154

121-
// This random value acts as a block header, as otherwise the partition of
122-
// opcodes into BBs wouldn't affect the hash, only the order of the
123-
// opcodes
124-
hash(45798);
155+
Hashes.emplace_back(BlockHeaderHash);
125156
for (auto &Inst : *BB)
126-
updateInstruction(Inst, DetailedHash);
157+
Hashes.emplace_back(hashInstruction(Inst));
127158

128159
for (const BasicBlock *Succ : successors(BB))
129160
if (VisitedBBs.insert(Succ).second)
130161
BBs.push_back(Succ);
131162
}
163+
164+
// Update the combined hash in place.
165+
Hash = stable_hash_combine(Hashes);
132166
}
133167

134168
void update(const GlobalVariable &GV) {
@@ -137,30 +171,35 @@ class StructuralHashImpl {
137171
// we ignore anything with the `.llvm` prefix
138172
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
139173
return;
140-
hash(23456); // Global header
141-
hash(GV.getValueType()->getTypeID());
174+
SmallVector<stable_hash> Hashes;
175+
Hashes.emplace_back(Hash);
176+
Hashes.emplace_back(GlobalHeaderHash);
177+
Hashes.emplace_back(GV.getValueType()->getTypeID());
178+
179+
// Update the combined hash in place.
180+
Hash = stable_hash_combine(Hashes);
142181
}
143182

144-
void update(const Module &M, bool DetailedHash) {
183+
void update(const Module &M) {
145184
for (const GlobalVariable &GV : M.globals())
146185
update(GV);
147186
for (const Function &F : M)
148-
update(F, DetailedHash);
187+
update(F);
149188
}
150189

151190
uint64_t getHash() const { return Hash; }
152191
};
153192

154193
} // namespace
155194

156-
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
157-
StructuralHashImpl H;
158-
H.update(F, DetailedHash);
195+
stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
196+
StructuralHashImpl H(DetailedHash);
197+
H.update(F);
159198
return H.getHash();
160199
}
161200

162-
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
163-
StructuralHashImpl H;
164-
H.update(M, DetailedHash);
201+
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
202+
StructuralHashImpl H(DetailedHash);
203+
H.update(M);
165204
return H.getHash();
166205
}

llvm/lib/Transforms/IPO/MergeFunctions.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,14 @@ namespace {
172172

173173
class FunctionNode {
174174
mutable AssertingVH<Function> F;
175-
IRHash Hash;
175+
stable_hash Hash;
176176

177177
public:
178178
// Note the hash is recalculated potentially multiple times, but it is cheap.
179179
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
180180

181181
Function *getFunc() const { return F; }
182-
IRHash getHash() const { return Hash; }
182+
stable_hash getHash() const { return Hash; }
183183

184184
/// Replace the reference to the function F by the function G, assuming their
185185
/// implementations are equal.
@@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {
420420

421421
// All functions in the module, ordered by hash. Functions with a unique
422422
// hash value are easily eliminated.
423-
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
423+
std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
424424
for (Function &Func : M) {
425425
if (isEligibleForMerging(Func)) {
426426
HashedFuncs.push_back({StructuralHash(Func), &Func});

llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ lpad:
6363
resume { ptr, i32 } zeroinitializer
6464
}
6565

66+
define i8 @call_with_same_range() {
67+
; CHECK-LABEL: @call_with_same_range
68+
; CHECK: tail call i8 @call_with_range
69+
bitcast i8 0 to i8
70+
%out = call i8 @dummy(), !range !0
71+
ret i8 %out
72+
}
73+
6674
define i8 @invoke_with_same_range() personality ptr undef {
6775
; CHECK-LABEL: @invoke_with_same_range()
6876
; CHECK: tail call i8 @invoke_with_range()
@@ -76,15 +84,6 @@ lpad:
7684
resume { ptr, i32 } zeroinitializer
7785
}
7886

79-
define i8 @call_with_same_range() {
80-
; CHECK-LABEL: @call_with_same_range
81-
; CHECK: tail call i8 @call_with_range
82-
bitcast i8 0 to i8
83-
%out = call i8 @dummy(), !range !0
84-
ret i8 %out
85-
}
86-
87-
8887
declare i8 @dummy();
8988
declare i32 @__gxx_personality_v0(...)
9089

0 commit comments

Comments
 (0)