Skip to content

Commit 64da0be

Browse files
Reland "[NFCi][MergeFunctions] Consolidate Hashing Functions"
This is a reland of 28134a2 which was reverted due to behavioral differences between 32 and 64 bit builds that have since been fixed. Differential Revision: https://reviews.llvm.org/D158217
1 parent 58696d2 commit 64da0be

File tree

5 files changed

+39
-84
lines changed

5 files changed

+39
-84
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ namespace llvm {
2121
class Function;
2222
class Module;
2323

24-
uint64_t StructuralHash(const Function &F);
25-
uint64_t StructuralHash(const Module &M);
24+
using IRHash = uint64_t;
25+
26+
IRHash StructuralHash(const Function &F);
27+
IRHash StructuralHash(const Module &M);
2628

2729
} // end namespace llvm
2830

llvm/include/llvm/Transforms/Utils/FunctionComparator.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,6 @@ class FunctionComparator {
9999
/// Test whether the two functions have equivalent behaviour.
100100
int compare();
101101

102-
/// Hash a function. Equivalent functions will have the same hash, and unequal
103-
/// functions will have different hashes with high probability.
104-
using FunctionHash = uint64_t;
105-
static FunctionHash functionHash(Function &);
106-
107102
protected:
108103
/// Start the comparison.
109104
void beginCompare() {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,54 @@ namespace {
2020
// llvm/lib/Transforms/Utils/FunctionComparator.cpp
2121

2222
class StructuralHashImpl {
23-
hash_code Hash;
23+
uint64_t Hash;
2424

25-
template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); }
25+
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
2626

2727
public:
2828
StructuralHashImpl() : Hash(4) {}
2929

30+
// A function hash is calculated by considering only the number of arguments
31+
// and whether a function is varargs, the order of basic blocks (given by the
32+
// successors of each basic block in depth first order), and the order of
33+
// opcodes of each instruction within each of these basic blocks. This mirrors
34+
// the strategy FunctionComparator::compare() uses to compare functions by
35+
// walking the BBs in depth first order and comparing each instruction in
36+
// sequence. Because this hash currently does not look at the operands, it is
37+
// insensitive to things such as the target of calls and the constants used in
38+
// the function, which makes it useful when possibly merging functions which
39+
// are the same modulo constants and call targets.
40+
//
41+
// Note that different users of StructuralHash will want different behavior
42+
// out of it (i.e., MergeFunctions will want something different from PM
43+
// expensive checks for pass modification status). When modifying this
44+
// function, most changes should be gated behind an option and enabled
45+
// selectively.
3046
void update(const Function &F) {
3147
// Declarations don't affect analyses.
3248
if (F.isDeclaration())
3349
return;
3450

35-
hash(12345); // Function header
51+
hash(0x62642d6b6b2d6b72); // Function header
3652

3753
hash(F.isVarArg());
3854
hash(F.arg_size());
3955

4056
SmallVector<const BasicBlock *, 8> BBs;
4157
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
4258

59+
// Walk the blocks in the same order as
60+
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
61+
// function "structure." (BB and opcode sequence)
4362
BBs.push_back(&F.getEntryBlock());
4463
VisitedBBs.insert(BBs[0]);
4564
while (!BBs.empty()) {
4665
const BasicBlock *BB = BBs.pop_back_val();
47-
hash(45798); // Block header
66+
67+
// This random value acts as a block header, as otherwise the partition of
68+
// opcodes into BBs wouldn't affect the hash, only the order of the
69+
// opcodes
70+
hash(45798);
4871
for (auto &Inst : *BB)
4972
hash(Inst.getOpcode());
5073

@@ -79,13 +102,13 @@ class StructuralHashImpl {
79102

80103
} // namespace
81104

82-
uint64_t llvm::StructuralHash(const Function &F) {
105+
IRHash llvm::StructuralHash(const Function &F) {
83106
StructuralHashImpl H;
84107
H.update(F);
85108
return H.getHash();
86109
}
87110

88-
uint64_t llvm::StructuralHash(const Module &M) {
111+
IRHash llvm::StructuralHash(const Module &M) {
89112
StructuralHashImpl H;
90113
H.update(M);
91114
return H.getHash();

llvm/lib/Transforms/IPO/MergeFunctions.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
#include "llvm/IR/Instructions.h"
108108
#include "llvm/IR/IntrinsicInst.h"
109109
#include "llvm/IR/Module.h"
110+
#include "llvm/IR/StructuralHash.h"
110111
#include "llvm/IR/Type.h"
111112
#include "llvm/IR/Use.h"
112113
#include "llvm/IR/User.h"
@@ -171,15 +172,14 @@ namespace {
171172

172173
class FunctionNode {
173174
mutable AssertingVH<Function> F;
174-
FunctionComparator::FunctionHash Hash;
175+
IRHash Hash;
175176

176177
public:
177178
// Note the hash is recalculated potentially multiple times, but it is cheap.
178-
FunctionNode(Function *F)
179-
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
179+
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
180180

181181
Function *getFunc() const { return F; }
182-
FunctionComparator::FunctionHash getHash() const { return Hash; }
182+
IRHash getHash() const { return Hash; }
183183

184184
/// Replace the reference to the function F by the function G, assuming their
185185
/// implementations are equal.
@@ -390,11 +390,10 @@ bool MergeFunctions::runOnModule(Module &M) {
390390

391391
// All functions in the module, ordered by hash. Functions with a unique
392392
// hash value are easily eliminated.
393-
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
394-
HashedFuncs;
393+
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
395394
for (Function &Func : M) {
396395
if (isEligibleForMerging(Func)) {
397-
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
396+
HashedFuncs.push_back({StructuralHash(Func), &Func});
398397
}
399398
}
400399

llvm/lib/Transforms/Utils/FunctionComparator.cpp

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -958,67 +958,3 @@ int FunctionComparator::compare() {
958958
}
959959
return 0;
960960
}
961-
962-
namespace {
963-
964-
// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
965-
// hash of a sequence of 64bit ints, but the entire input does not need to be
966-
// available at once. This interface is necessary for functionHash because it
967-
// needs to accumulate the hash as the structure of the function is traversed
968-
// without saving these values to an intermediate buffer. This form of hashing
969-
// is not often needed, as usually the object to hash is just read from a
970-
// buffer.
971-
class HashAccumulator64 {
972-
uint64_t Hash;
973-
974-
public:
975-
// Initialize to random constant, so the state isn't zero.
976-
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
977-
978-
void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
979-
980-
// No finishing is required, because the entire hash value is used.
981-
uint64_t getHash() { return Hash; }
982-
};
983-
984-
} // end anonymous namespace
985-
986-
// A function hash is calculated by considering only the number of arguments and
987-
// whether a function is varargs, the order of basic blocks (given by the
988-
// successors of each basic block in depth first order), and the order of
989-
// opcodes of each instruction within each of these basic blocks. This mirrors
990-
// the strategy compare() uses to compare functions by walking the BBs in depth
991-
// first order and comparing each instruction in sequence. Because this hash
992-
// does not look at the operands, it is insensitive to things such as the
993-
// target of calls and the constants used in the function, which makes it useful
994-
// when possibly merging functions which are the same modulo constants and call
995-
// targets.
996-
FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
997-
HashAccumulator64 H;
998-
H.add(F.isVarArg());
999-
H.add(F.arg_size());
1000-
1001-
SmallVector<const BasicBlock *, 8> BBs;
1002-
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
1003-
1004-
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
1005-
// accumulating the hash of the function "structure." (BB and opcode sequence)
1006-
BBs.push_back(&F.getEntryBlock());
1007-
VisitedBBs.insert(BBs[0]);
1008-
while (!BBs.empty()) {
1009-
const BasicBlock *BB = BBs.pop_back_val();
1010-
// This random value acts as a block header, as otherwise the partition of
1011-
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes
1012-
H.add(45798);
1013-
for (const auto &Inst : *BB) {
1014-
H.add(Inst.getOpcode());
1015-
}
1016-
const Instruction *Term = BB->getTerminator();
1017-
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
1018-
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
1019-
continue;
1020-
BBs.push_back(Term->getSuccessor(i));
1021-
}
1022-
}
1023-
return H.getHash();
1024-
}

0 commit comments

Comments
 (0)