Skip to content

Commit 6225d74

Browse files
committed
[StructuralHash] Support Differences
This comutes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored.
1 parent e715fc6 commit 6225d74

File tree

3 files changed

+275
-14
lines changed

3 files changed

+275
-14
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
#ifndef LLVM_IR_STRUCTURALHASH_H
1515
#define LLVM_IR_STRUCTURALHASH_H
1616

17+
#include "llvm/ADT/MapVector.h"
1718
#include "llvm/ADT/StableHashing.h"
19+
#include "llvm/IR/Instruction.h"
1820
#include <cstdint>
1921

2022
namespace llvm {
@@ -23,6 +25,7 @@ class Function;
2325
class Module;
2426

2527
using IRHash = stable_hash;
28+
using OpndHash = stable_hash;
2629

2730
/// Returns a hash of the function \p F.
2831
/// \param F The function to hash.
@@ -37,6 +40,49 @@ IRHash StructuralHash(const Function &F, bool DetailedHash = false);
3740
/// composed the module hash.
3841
IRHash StructuralHash(const Module &M, bool DetailedHash = false);
3942

43+
/// The pair of an instruction index and a operand index.
44+
using IndexPair = std::pair<unsigned, unsigned>;
45+
46+
/// A map from an instruction index to an instruction pointer.
47+
using IndexInstrMap = MapVector<unsigned, Instruction *>;
48+
49+
/// A map from an IndexPair to an OpndHash.
50+
using IndexOperandHashMapType = DenseMap<IndexPair, OpndHash>;
51+
52+
/// A function that takes an instruction and an operand index and returns true
53+
/// if the operand should be ignored in the function hash computation.
54+
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;
55+
56+
struct FunctionHashInfo {
57+
/// A hash value representing the structural content of the function
58+
IRHash FunctionHash;
59+
/// A mapping from instruction indices to instruction pointers
60+
std::unique_ptr<IndexInstrMap> IndexInstruction;
61+
/// A mapping from pairs of instruction indices and operand indices
62+
/// to the hashes of the operands. This can be used to analyze or
63+
/// reconstruct the differences in ignored operands
64+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
65+
66+
FunctionHashInfo(IRHash FuntionHash,
67+
std::unique_ptr<IndexInstrMap> IndexInstruction,
68+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
69+
: FunctionHash(FuntionHash),
70+
IndexInstruction(std::move(IndexInstruction)),
71+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
72+
};
73+
74+
/// Computes a structural hash of a given function, considering the structure
75+
/// and content of the function's instructions while allowing for selective
76+
/// ignoring of certain operands based on custom criteria. This hash can be used
77+
/// to identify functions that are structurally similar or identical, which is
78+
/// useful in optimizations, deduplication, or analysis tasks.
79+
/// \param F The function to hash.
80+
/// \param IgnoreOp A callable that takes an instruction and an operand index,
81+
/// and returns true if the operand should be ignored in the hash computation.
82+
/// \return A FunctionHashInfo structure
83+
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
84+
IgnoreOperandFunc IgnoreOp);
85+
4086
} // end namespace llvm
4187

4288
#endif

llvm/lib/IR/StructuralHash.cpp

Lines changed: 174 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,19 @@ class StructuralHashImpl {
2828

2929
bool DetailedHash;
3030

31+
/// IgnoreOp is a function that returns true if the operand should be ignored.
32+
IgnoreOperandFunc IgnoreOp = nullptr;
33+
/// A mapping from instruction indices to instruction pointers.
34+
/// The index represents the position of an instruction based on the order in
35+
/// which it is first encountered.
36+
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
37+
/// A mapping from pairs of instruction indices and operand indices
38+
/// to the hashes of the operands.
39+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
40+
41+
/// Assign a unique ID to each Value in the order they are first seen.
42+
DenseMap<const Value *, int> ValueToId;
43+
3144
// This will produce different values on 32-bit and 64-bit systens as
3245
// hash_combine returns a size_t. However, this is only used for
3346
// detailed hashing which, in-tree, only needs to distinguish between
@@ -47,24 +60,140 @@ class StructuralHashImpl {
4760

4861
public:
4962
StructuralHashImpl() = delete;
50-
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
63+
explicit StructuralHashImpl(bool DetailedHash,
64+
IgnoreOperandFunc IgnoreOp = nullptr)
65+
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
66+
if (IgnoreOp) {
67+
IndexInstruction = std::make_unique<IndexInstrMap>();
68+
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
69+
}
70+
}
5171

52-
stable_hash hashConstant(Constant *C) {
72+
stable_hash hashAPInt(const APInt &I) {
5373
SmallVector<stable_hash> Hashes;
54-
// TODO: hashArbitaryType() is not stable.
55-
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
56-
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
57-
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
58-
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
59-
} else if (Function *Func = dyn_cast<Function>(C))
60-
// Hashing the name will be deterministic as LLVM's hashing infrastructure
61-
// has explicit support for hashing strings and will not simply hash
62-
// the pointer.
63-
Hashes.emplace_back(hashArbitaryType(Func->getName()));
74+
Hashes.emplace_back(I.getBitWidth());
75+
for (unsigned J = 0; J < I.getNumWords(); ++J)
76+
Hashes.emplace_back((I.getRawData())[J]);
77+
return stable_hash_combine(Hashes);
78+
}
6479

80+
stable_hash hashAPFloat(const APFloat &F) {
81+
SmallVector<stable_hash> Hashes;
82+
const fltSemantics &S = F.getSemantics();
83+
Hashes.emplace_back(APFloat::semanticsPrecision(S));
84+
Hashes.emplace_back(APFloat::semanticsMaxExponent(S));
85+
Hashes.emplace_back(APFloat::semanticsMinExponent(S));
86+
Hashes.emplace_back(APFloat::semanticsSizeInBits(S));
87+
Hashes.emplace_back(hashAPInt(F.bitcastToAPInt()));
6588
return stable_hash_combine(Hashes);
6689
}
6790

91+
stable_hash hashGlobalValue(const GlobalValue *GV) {
92+
if (!GV->hasName())
93+
return 0;
94+
return stable_hash_name(GV->getName());
95+
}
96+
97+
// Compute a hash for a Constant. This function is logically similar to
98+
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
99+
// we're interested in computing a hash rather than comparing two Constants.
100+
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
101+
stable_hash hashConstant(Constant *C) {
102+
SmallVector<stable_hash> Hashes;
103+
104+
Type *Ty = C->getType();
105+
Hashes.emplace_back(hashType(Ty));
106+
107+
if (C->isNullValue()) {
108+
Hashes.emplace_back(static_cast<stable_hash>('N'));
109+
return stable_hash_combine(Hashes);
110+
}
111+
112+
auto *G = dyn_cast<GlobalValue>(C);
113+
if (G) {
114+
Hashes.emplace_back(hashGlobalValue(G));
115+
return stable_hash_combine(Hashes);
116+
}
117+
118+
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
119+
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
120+
return stable_hash_combine(Hashes);
121+
}
122+
123+
switch (C->getValueID()) {
124+
case Value::UndefValueVal:
125+
case Value::PoisonValueVal:
126+
case Value::ConstantTokenNoneVal: {
127+
return stable_hash_combine(Hashes);
128+
}
129+
case Value::ConstantIntVal: {
130+
const APInt &Int = cast<ConstantInt>(C)->getValue();
131+
Hashes.emplace_back(hashAPInt(Int));
132+
return stable_hash_combine(Hashes);
133+
}
134+
case Value::ConstantFPVal: {
135+
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
136+
Hashes.emplace_back(hashAPFloat(APF));
137+
return stable_hash_combine(Hashes);
138+
}
139+
case Value::ConstantArrayVal: {
140+
const ConstantArray *A = cast<ConstantArray>(C);
141+
uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements();
142+
Hashes.emplace_back(NumElements);
143+
for (auto &Op : A->operands()) {
144+
auto H = hashConstant(cast<Constant>(Op));
145+
Hashes.emplace_back(H);
146+
}
147+
return stable_hash_combine(Hashes);
148+
}
149+
case Value::ConstantStructVal: {
150+
const ConstantStruct *S = cast<ConstantStruct>(C);
151+
unsigned NumElements = cast<StructType>(Ty)->getNumElements();
152+
Hashes.emplace_back(NumElements);
153+
for (auto &Op : S->operands()) {
154+
auto H = hashConstant(cast<Constant>(Op));
155+
Hashes.emplace_back(H);
156+
}
157+
return stable_hash_combine(Hashes);
158+
}
159+
case Value::ConstantVectorVal: {
160+
const ConstantVector *V = cast<ConstantVector>(C);
161+
unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements();
162+
Hashes.emplace_back(NumElements);
163+
for (auto &Op : V->operands()) {
164+
auto H = hashConstant(cast<Constant>(Op));
165+
Hashes.emplace_back(H);
166+
}
167+
return stable_hash_combine(Hashes);
168+
}
169+
case Value::ConstantExprVal: {
170+
const ConstantExpr *E = cast<ConstantExpr>(C);
171+
unsigned NumOperands = E->getNumOperands();
172+
Hashes.emplace_back(NumOperands);
173+
for (auto &Op : E->operands()) {
174+
auto H = hashConstant(cast<Constant>(Op));
175+
Hashes.emplace_back(H);
176+
}
177+
return stable_hash_combine(Hashes);
178+
}
179+
case Value::BlockAddressVal: {
180+
const BlockAddress *BA = cast<BlockAddress>(C);
181+
auto H = hashGlobalValue(BA->getFunction());
182+
Hashes.emplace_back(H);
183+
return stable_hash_combine(Hashes);
184+
}
185+
case Value::DSOLocalEquivalentVal: {
186+
const auto *Equiv = cast<DSOLocalEquivalent>(C);
187+
auto H = hashGlobalValue(Equiv->getGlobalValue());
188+
Hashes.emplace_back(H);
189+
return stable_hash_combine(Hashes);
190+
}
191+
default: // Unknown constant, abort.
192+
llvm_unreachable("Constant ValueID not recognized.");
193+
}
194+
return Hash;
195+
}
196+
68197
stable_hash hashValue(Value *V) {
69198
// Check constant and return its hash.
70199
Constant *C = dyn_cast<Constant>(V);
@@ -76,6 +205,10 @@ class StructuralHashImpl {
76205
if (Argument *Arg = dyn_cast<Argument>(V))
77206
Hashes.emplace_back(Arg->getArgNo());
78207

208+
// Get an index (an insertion order) for the non-constant value.
209+
auto I = ValueToId.insert({V, ValueToId.size()});
210+
Hashes.emplace_back(I.first->second);
211+
79212
return stable_hash_combine(Hashes);
80213
}
81214

@@ -100,8 +233,20 @@ class StructuralHashImpl {
100233
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
101234
Hashes.emplace_back(ComparisonInstruction->getPredicate());
102235

103-
for (const auto &Op : Inst.operands())
104-
Hashes.emplace_back(hashOperand(Op));
236+
unsigned InstIdx = 0;
237+
if (IndexInstruction) {
238+
InstIdx = IndexInstruction->size();
239+
IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)});
240+
}
241+
242+
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
243+
auto OpndHash = hashOperand(Op);
244+
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
245+
assert(IndexOperandHashMap);
246+
IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash});
247+
} else
248+
Hashes.emplace_back(OpndHash);
249+
}
105250

106251
return stable_hash_combine(Hashes);
107252
}
@@ -184,6 +329,12 @@ class StructuralHashImpl {
184329
}
185330

186331
uint64_t getHash() const { return Hash; }
332+
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
333+
return std::move(IndexInstruction);
334+
}
335+
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
336+
return std::move(IndexOperandHashMap);
337+
}
187338
};
188339

189340
} // namespace
@@ -199,3 +350,12 @@ IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
199350
H.update(M);
200351
return H.getHash();
201352
}
353+
354+
FunctionHashInfo
355+
llvm::StructuralHashWithDifferences(const Function &F,
356+
IgnoreOperandFunc IgnoreOp) {
357+
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
358+
H.update(F);
359+
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
360+
H.getIndexPairOpndHashMap());
361+
}

llvm/unittests/IR/StructuralHashTest.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) {
239239
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
240240
EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
241241
}
242+
243+
TEST(StructuralHashTest, Differences) {
244+
LLVMContext Ctx;
245+
std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
246+
" %c = add i64 %a, 1\n"
247+
" %b = call i64 @f1(i64 %c)\n"
248+
" ret i64 %b\n"
249+
"}\n"
250+
"declare i64 @f1(i64)");
251+
auto *F1 = M1->getFunction("f");
252+
std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n"
253+
" %c = add i64 %a, 1\n"
254+
" %b = call i64 @f2(i64 %c)\n"
255+
" ret i64 %b\n"
256+
"}\n"
257+
"declare i64 @f2(i64)");
258+
auto *F2 = M2->getFunction("g");
259+
260+
// They are originally different when not ignoring any operand.
261+
EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true));
262+
EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash,
263+
StructuralHashWithDifferences(*F2, nullptr).FunctionHash);
264+
265+
// When we ignore the call target f1 vs f2, they have the same hash.
266+
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
267+
return I->getOpcode() == Instruction::Call && OpndIdx == 1;
268+
};
269+
auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp);
270+
auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp);
271+
EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);
272+
273+
// There are total 3 instructions.
274+
EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u);
275+
EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u);
276+
277+
// The only 1 operand (the call target) has been ignored.
278+
EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u);
279+
EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u);
280+
281+
// The index pair of instruction and operand (1, 1) is a key in the map.
282+
ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1}));
283+
ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1}));
284+
285+
// The indexed instruciton must be the call instruction as shown in the
286+
// IgnoreOp above.
287+
EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(),
288+
Instruction::Call);
289+
EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(),
290+
Instruction::Call);
291+
292+
// The ignored operand hashes (for f1 vs. f2) are different.
293+
EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}),
294+
FuncHashInfo2.IndexOperandHashMap->lookup({1, 1}));
295+
}
296+
242297
} // end anonymous namespace

0 commit comments

Comments
 (0)