Skip to content

Commit 7ff7df1

Browse files
Revert "[NFCi][MergeFunctions] Consolidate Hashing Functions"
This reverts commit 28134a2. This patch was causing build failures on multiple buildbots on 32-bit architectures. Reverting now so I can deboug out-of-trunk and resubmit later.
1 parent b41e75c commit 7ff7df1

File tree

5 files changed

+82
-37
lines changed

5 files changed

+82
-37
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ namespace llvm {
2121
class Function;
2222
class Module;
2323

24-
using IRHash = uint64_t;
25-
26-
IRHash StructuralHash(const Function &F);
27-
IRHash StructuralHash(const Module &M);
24+
uint64_t StructuralHash(const Function &F);
25+
uint64_t StructuralHash(const Module &M);
2826

2927
} // end namespace llvm
3028

llvm/include/llvm/Transforms/Utils/FunctionComparator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ class FunctionComparator {
9999
/// Test whether the two functions have equivalent behaviour.
100100
int compare();
101101

102+
/// Hash a function. Equivalent functions will have the same hash, and unequal
103+
/// functions will have different hashes with high probability.
104+
using FunctionHash = uint64_t;
105+
static FunctionHash functionHash(Function &);
106+
102107
protected:
103108
/// Start the comparison.
104109
void beginCompare() {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,47 +27,24 @@ class StructuralHashImpl {
2727
public:
2828
StructuralHashImpl() : Hash(4) {}
2929

30-
// A function hash is calculated by considering only the number of arguments
31-
// and whether a function is varargs, the order of basic blocks (given by the
32-
// successors of each basic block in depth first order), and the order of
33-
// opcodes of each instruction within each of these basic blocks. This mirrors
34-
// the strategy FunctionComparator::compare() uses to compare functions by
35-
// walking the BBs in depth first order and comparing each instruction in
36-
// sequence. Because this hash currently does not look at the operands, it is
37-
// insensitive to things such as the target of calls and the constants used in
38-
// the function, which makes it useful when possibly merging functions which
39-
// are the same modulo constants and call targets.
40-
//
41-
// Note that different users of StructuralHash will want different behavior
42-
// out of it (i.e., MergeFunctions will want something different from PM
43-
// expensive checks for pass modification status). When modifying this
44-
// function, most changes should be gated behind an option and enabled
45-
// selectively.
4630
void update(const Function &F) {
4731
// Declarations don't affect analyses.
4832
if (F.isDeclaration())
4933
return;
5034

51-
hash(0x6acaa36bef8325c5ULL); // Function header
35+
hash(12345); // Function header
5236

5337
hash(F.isVarArg());
5438
hash(F.arg_size());
5539

5640
SmallVector<const BasicBlock *, 8> BBs;
5741
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
5842

59-
// Walk the blocks in the same order as
60-
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
61-
// function "structure." (BB and opcode sequence)
6243
BBs.push_back(&F.getEntryBlock());
6344
VisitedBBs.insert(BBs[0]);
6445
while (!BBs.empty()) {
6546
const BasicBlock *BB = BBs.pop_back_val();
66-
67-
// This random value acts as a block header, as otherwise the partition of
68-
// opcodes into BBs wouldn't affect the hash, only the order of the
69-
// opcodes
70-
hash(45798);
47+
hash(45798); // Block header
7148
for (auto &Inst : *BB)
7249
hash(Inst.getOpcode());
7350

@@ -102,13 +79,13 @@ class StructuralHashImpl {
10279

10380
} // namespace
10481

105-
IRHash llvm::StructuralHash(const Function &F) {
82+
uint64_t llvm::StructuralHash(const Function &F) {
10683
StructuralHashImpl H;
10784
H.update(F);
10885
return H.getHash();
10986
}
11087

111-
IRHash llvm::StructuralHash(const Module &M) {
88+
uint64_t llvm::StructuralHash(const Module &M) {
11289
StructuralHashImpl H;
11390
H.update(M);
11491
return H.getHash();

llvm/lib/Transforms/IPO/MergeFunctions.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@
107107
#include "llvm/IR/Instructions.h"
108108
#include "llvm/IR/IntrinsicInst.h"
109109
#include "llvm/IR/Module.h"
110-
#include "llvm/IR/StructuralHash.h"
111110
#include "llvm/IR/Type.h"
112111
#include "llvm/IR/Use.h"
113112
#include "llvm/IR/User.h"
@@ -172,14 +171,15 @@ namespace {
172171

173172
class FunctionNode {
174173
mutable AssertingVH<Function> F;
175-
IRHash Hash;
174+
FunctionComparator::FunctionHash Hash;
176175

177176
public:
178177
// Note the hash is recalculated potentially multiple times, but it is cheap.
179-
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
178+
FunctionNode(Function *F)
179+
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
180180

181181
Function *getFunc() const { return F; }
182-
IRHash getHash() const { return Hash; }
182+
FunctionComparator::FunctionHash getHash() const { return Hash; }
183183

184184
/// Replace the reference to the function F by the function G, assuming their
185185
/// implementations are equal.
@@ -390,10 +390,11 @@ bool MergeFunctions::runOnModule(Module &M) {
390390

391391
// All functions in the module, ordered by hash. Functions with a unique
392392
// hash value are easily eliminated.
393-
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
393+
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
394+
HashedFuncs;
394395
for (Function &Func : M) {
395396
if (isEligibleForMerging(Func)) {
396-
HashedFuncs.push_back({StructuralHash(Func), &Func});
397+
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
397398
}
398399
}
399400

llvm/lib/Transforms/Utils/FunctionComparator.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,3 +958,67 @@ int FunctionComparator::compare() {
958958
}
959959
return 0;
960960
}
961+
962+
namespace {
963+
964+
// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
965+
// hash of a sequence of 64bit ints, but the entire input does not need to be
966+
// available at once. This interface is necessary for functionHash because it
967+
// needs to accumulate the hash as the structure of the function is traversed
968+
// without saving these values to an intermediate buffer. This form of hashing
969+
// is not often needed, as usually the object to hash is just read from a
970+
// buffer.
971+
class HashAccumulator64 {
972+
uint64_t Hash;
973+
974+
public:
975+
// Initialize to random constant, so the state isn't zero.
976+
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
977+
978+
void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
979+
980+
// No finishing is required, because the entire hash value is used.
981+
uint64_t getHash() { return Hash; }
982+
};
983+
984+
} // end anonymous namespace
985+
986+
// A function hash is calculated by considering only the number of arguments and
987+
// whether a function is varargs, the order of basic blocks (given by the
988+
// successors of each basic block in depth first order), and the order of
989+
// opcodes of each instruction within each of these basic blocks. This mirrors
990+
// the strategy compare() uses to compare functions by walking the BBs in depth
991+
// first order and comparing each instruction in sequence. Because this hash
992+
// does not look at the operands, it is insensitive to things such as the
993+
// target of calls and the constants used in the function, which makes it useful
994+
// when possibly merging functions which are the same modulo constants and call
995+
// targets.
996+
FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
997+
HashAccumulator64 H;
998+
H.add(F.isVarArg());
999+
H.add(F.arg_size());
1000+
1001+
SmallVector<const BasicBlock *, 8> BBs;
1002+
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
1003+
1004+
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
1005+
// accumulating the hash of the function "structure." (BB and opcode sequence)
1006+
BBs.push_back(&F.getEntryBlock());
1007+
VisitedBBs.insert(BBs[0]);
1008+
while (!BBs.empty()) {
1009+
const BasicBlock *BB = BBs.pop_back_val();
1010+
// This random value acts as a block header, as otherwise the partition of
1011+
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes
1012+
H.add(45798);
1013+
for (const auto &Inst : *BB) {
1014+
H.add(Inst.getOpcode());
1015+
}
1016+
const Instruction *Term = BB->getTerminator();
1017+
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
1018+
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
1019+
continue;
1020+
BBs.push_back(Term->getSuccessor(i));
1021+
}
1022+
}
1023+
return H.getHash();
1024+
}

0 commit comments

Comments
 (0)