Skip to content

Commit 28134a2

Browse files
[NFCi][MergeFunctions] Consolidate Hashing Functions
A couple years ago, StructuralHash was created, copying the exact hashing implementation from FunctionComparator (minus a couple small details/refactorings). Since then, the hashing implementation has not diverged, but several other areas, like unit testing, have diverged significantly, with StructuralHash getting more attention in these areas. This patch aims to consolidate the two hashing functions into StructuralHash given they do the exact same thing and having less divergence in areas like unit testing would be beneficial. The original aim at creating a separate StructuralHash was to make the implementation divergent and capture additional details like instruction operands (which neither hashing implementation does currently). The MergeFunctions pass doesn't need these detaisl, but verification of pass return values would benefit from this additional data. Setting an option to calculate these values would allow for divergent behavior where appropriate while reducing code duplication with little runtime overhead. Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D158217
1 parent 4da76ea commit 28134a2

File tree

5 files changed

+37
-82
lines changed

5 files changed

+37
-82
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ namespace llvm {
2121
class Function;
2222
class Module;
2323

24-
uint64_t StructuralHash(const Function &F);
25-
uint64_t StructuralHash(const Module &M);
24+
using IRHash = uint64_t;
25+
26+
IRHash StructuralHash(const Function &F);
27+
IRHash StructuralHash(const Module &M);
2628

2729
} // end namespace llvm
2830

llvm/include/llvm/Transforms/Utils/FunctionComparator.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,6 @@ class FunctionComparator {
9999
/// Test whether the two functions have equivalent behaviour.
100100
int compare();
101101

102-
/// Hash a function. Equivalent functions will have the same hash, and unequal
103-
/// functions will have different hashes with high probability.
104-
using FunctionHash = uint64_t;
105-
static FunctionHash functionHash(Function &);
106-
107102
protected:
108103
/// Start the comparison.
109104
void beginCompare() {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,47 @@ class StructuralHashImpl {
2727
public:
2828
StructuralHashImpl() : Hash(4) {}
2929

30+
// A function hash is calculated by considering only the number of arguments
31+
// and whether a function is varargs, the order of basic blocks (given by the
32+
// successors of each basic block in depth first order), and the order of
33+
// opcodes of each instruction within each of these basic blocks. This mirrors
34+
// the strategy FunctionComparator::compare() uses to compare functions by
35+
// walking the BBs in depth first order and comparing each instruction in
36+
// sequence. Because this hash currently does not look at the operands, it is
37+
// insensitive to things such as the target of calls and the constants used in
38+
// the function, which makes it useful when possibly merging functions which
39+
// are the same modulo constants and call targets.
40+
//
41+
// Note that different users of StructuralHash will want different behavior
42+
// out of it (i.e., MergeFunctions will want something different from PM
43+
// expensive checks for pass modification status). When modifying this
44+
// function, most changes should be gated behind an option and enabled
45+
// selectively.
3046
void update(const Function &F) {
3147
// Declarations don't affect analyses.
3248
if (F.isDeclaration())
3349
return;
3450

35-
hash(12345); // Function header
51+
hash(0x6acaa36bef8325c5ULL); // Function header
3652

3753
hash(F.isVarArg());
3854
hash(F.arg_size());
3955

4056
SmallVector<const BasicBlock *, 8> BBs;
4157
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
4258

59+
// Walk the blocks in the same order as
60+
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
61+
// function "structure." (BB and opcode sequence)
4362
BBs.push_back(&F.getEntryBlock());
4463
VisitedBBs.insert(BBs[0]);
4564
while (!BBs.empty()) {
4665
const BasicBlock *BB = BBs.pop_back_val();
47-
hash(45798); // Block header
66+
67+
// This random value acts as a block header, as otherwise the partition of
68+
// opcodes into BBs wouldn't affect the hash, only the order of the
69+
// opcodes
70+
hash(45798);
4871
for (auto &Inst : *BB)
4972
hash(Inst.getOpcode());
5073

@@ -79,13 +102,13 @@ class StructuralHashImpl {
79102

80103
} // namespace
81104

82-
uint64_t llvm::StructuralHash(const Function &F) {
105+
IRHash llvm::StructuralHash(const Function &F) {
83106
StructuralHashImpl H;
84107
H.update(F);
85108
return H.getHash();
86109
}
87110

88-
uint64_t llvm::StructuralHash(const Module &M) {
111+
IRHash llvm::StructuralHash(const Module &M) {
89112
StructuralHashImpl H;
90113
H.update(M);
91114
return H.getHash();

llvm/lib/Transforms/IPO/MergeFunctions.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
#include "llvm/IR/Instructions.h"
108108
#include "llvm/IR/IntrinsicInst.h"
109109
#include "llvm/IR/Module.h"
110+
#include "llvm/IR/StructuralHash.h"
110111
#include "llvm/IR/Type.h"
111112
#include "llvm/IR/Use.h"
112113
#include "llvm/IR/User.h"
@@ -171,15 +172,14 @@ namespace {
171172

172173
class FunctionNode {
173174
mutable AssertingVH<Function> F;
174-
FunctionComparator::FunctionHash Hash;
175+
IRHash Hash;
175176

176177
public:
177178
// Note the hash is recalculated potentially multiple times, but it is cheap.
178-
FunctionNode(Function *F)
179-
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
179+
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
180180

181181
Function *getFunc() const { return F; }
182-
FunctionComparator::FunctionHash getHash() const { return Hash; }
182+
IRHash getHash() const { return Hash; }
183183

184184
/// Replace the reference to the function F by the function G, assuming their
185185
/// implementations are equal.
@@ -390,11 +390,10 @@ bool MergeFunctions::runOnModule(Module &M) {
390390

391391
// All functions in the module, ordered by hash. Functions with a unique
392392
// hash value are easily eliminated.
393-
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
394-
HashedFuncs;
393+
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
395394
for (Function &Func : M) {
396395
if (isEligibleForMerging(Func)) {
397-
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
396+
HashedFuncs.push_back({StructuralHash(Func), &Func});
398397
}
399398
}
400399

llvm/lib/Transforms/Utils/FunctionComparator.cpp

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -958,67 +958,3 @@ int FunctionComparator::compare() {
958958
}
959959
return 0;
960960
}
961-
962-
namespace {
963-
964-
// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
965-
// hash of a sequence of 64bit ints, but the entire input does not need to be
966-
// available at once. This interface is necessary for functionHash because it
967-
// needs to accumulate the hash as the structure of the function is traversed
968-
// without saving these values to an intermediate buffer. This form of hashing
969-
// is not often needed, as usually the object to hash is just read from a
970-
// buffer.
971-
class HashAccumulator64 {
972-
uint64_t Hash;
973-
974-
public:
975-
// Initialize to random constant, so the state isn't zero.
976-
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
977-
978-
void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
979-
980-
// No finishing is required, because the entire hash value is used.
981-
uint64_t getHash() { return Hash; }
982-
};
983-
984-
} // end anonymous namespace
985-
986-
// A function hash is calculated by considering only the number of arguments and
987-
// whether a function is varargs, the order of basic blocks (given by the
988-
// successors of each basic block in depth first order), and the order of
989-
// opcodes of each instruction within each of these basic blocks. This mirrors
990-
// the strategy compare() uses to compare functions by walking the BBs in depth
991-
// first order and comparing each instruction in sequence. Because this hash
992-
// does not look at the operands, it is insensitive to things such as the
993-
// target of calls and the constants used in the function, which makes it useful
994-
// when possibly merging functions which are the same modulo constants and call
995-
// targets.
996-
FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
997-
HashAccumulator64 H;
998-
H.add(F.isVarArg());
999-
H.add(F.arg_size());
1000-
1001-
SmallVector<const BasicBlock *, 8> BBs;
1002-
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
1003-
1004-
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
1005-
// accumulating the hash of the function "structure." (BB and opcode sequence)
1006-
BBs.push_back(&F.getEntryBlock());
1007-
VisitedBBs.insert(BBs[0]);
1008-
while (!BBs.empty()) {
1009-
const BasicBlock *BB = BBs.pop_back_val();
1010-
// This random value acts as a block header, as otherwise the partition of
1011-
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes
1012-
H.add(45798);
1013-
for (const auto &Inst : *BB) {
1014-
H.add(Inst.getOpcode());
1015-
}
1016-
const Instruction *Term = BB->getTerminator();
1017-
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
1018-
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
1019-
continue;
1020-
BBs.push_back(Term->getSuccessor(i));
1021-
}
1022-
}
1023-
return H.getHash();
1024-
}

0 commit comments

Comments
 (0)