Skip to content

Commit fb58d52

Browse files
committed
structural hash for global var
1 parent 5f881b7 commit fb58d52

File tree

5 files changed

+200
-10
lines changed

5 files changed

+200
-10
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ class Module;
3131
/// to true includes instruction and operand type information.
3232
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3333

34+
/// Returns a hash of the global variable \p G.
35+
stable_hash StructuralHash(const GlobalVariable &G);
36+
3437
/// Returns a hash of the module \p M by hashing all functions and global
3538
/// variables contained within. \param M The module to hash. \param DetailedHash
3639
/// Whether or not to encode additional information in the function hashes that

llvm/lib/CodeGen/MachineStableHash.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include "llvm/CodeGen/Register.h"
2828
#include "llvm/Config/llvm-config.h"
2929
#include "llvm/IR/Constants.h"
30+
#include "llvm/IR/GlobalVariable.h"
31+
#include "llvm/IR/StructuralHash.h"
3032
#include "llvm/MC/MCSymbol.h"
3133
#include "llvm/Support/Alignment.h"
3234
#include "llvm/Support/ErrorHandling.h"
@@ -97,9 +99,14 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
9799
++StableHashBailingGlobalAddress;
98100
return 0;
99101
}
100-
auto Name = GV->getName();
101-
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
102-
stable_hash_name(Name), MO.getOffset());
102+
stable_hash GVHash = 0;
103+
if (auto *GVar = dyn_cast<GlobalVariable>(GV))
104+
GVHash = StructuralHash(*GVar);
105+
if (!GVHash)
106+
GVHash = stable_hash_name(GV->getName());
107+
108+
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash,
109+
MO.getOffset());
103110
}
104111

105112
case MachineOperand::MO_TargetIndex: {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class StructuralHashImpl {
4646
/// Assign a unique ID to each Value in the order they are first seen.
4747
DenseMap<const Value *, int> ValueToId;
4848

49-
stable_hash hashType(Type *ValueType) {
49+
static stable_hash hashType(Type *ValueType) {
5050
SmallVector<stable_hash> Hashes;
5151
Hashes.emplace_back(ValueType->getTypeID());
5252
if (ValueType->isIntegerTy())
@@ -65,19 +65,44 @@ class StructuralHashImpl {
6565
}
6666
}
6767

68-
stable_hash hashAPInt(const APInt &I) {
68+
static stable_hash hashAPInt(const APInt &I) {
6969
SmallVector<stable_hash> Hashes;
7070
Hashes.emplace_back(I.getBitWidth());
7171
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
7272
Hashes.append(RawVals.begin(), RawVals.end());
7373
return stable_hash_combine(Hashes);
7474
}
7575

76-
stable_hash hashAPFloat(const APFloat &F) {
76+
static stable_hash hashAPFloat(const APFloat &F) {
7777
return hashAPInt(F.bitcastToAPInt());
7878
}
7979

80-
stable_hash hashGlobalValue(const GlobalValue *GV) {
80+
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81+
if (!GVar.hasInitializer())
82+
return hashGlobalValue(&GVar);
83+
84+
// Hash the contents of a string.
85+
if (GVar.getName().starts_with(".str"))
86+
return hashConstant(GVar.getInitializer());
87+
88+
// Hash structural contents of Objective-C metadata in specific sections.
89+
// This can be extended to other metadata if needed.
90+
static constexpr const char *SectionNames[] = {
91+
"__cfstring", "__cstring", "__objc_classrefs",
92+
"__objc_methname", "__objc_selrefs",
93+
};
94+
if (GVar.hasSection()) {
95+
StringRef SectionName = GVar.getSection();
96+
for (const char *Name : SectionNames) {
97+
if (SectionName.contains(Name))
98+
return hashConstant(GVar.getInitializer());
99+
}
100+
}
101+
102+
return hashGlobalValue(&GVar);
103+
}
104+
105+
static stable_hash hashGlobalValue(const GlobalValue *GV) {
81106
if (!GV->hasName())
82107
return 0;
83108
return stable_hash_name(GV->getName());
@@ -87,7 +112,7 @@ class StructuralHashImpl {
87112
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
88113
// we're interested in computing a hash rather than comparing two Constants.
89114
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
90-
stable_hash hashConstant(Constant *C) {
115+
static stable_hash hashConstant(const Constant *C) {
91116
SmallVector<stable_hash> Hashes;
92117

93118
Type *Ty = C->getType();
@@ -98,14 +123,21 @@ class StructuralHashImpl {
98123
return stable_hash_combine(Hashes);
99124
}
100125

126+
if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
127+
Hashes.emplace_back(hashGlobalVariable(*GVar));
128+
return stable_hash_combine(Hashes);
129+
}
130+
101131
if (auto *G = dyn_cast<GlobalValue>(C)) {
102132
Hashes.emplace_back(hashGlobalValue(G));
103133
return stable_hash_combine(Hashes);
104134
}
105135

106136
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
107-
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
108-
return stable_hash_combine(Hashes);
137+
if (Seq->isString()) {
138+
Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
139+
return stable_hash_combine(Hashes);
140+
}
109141
}
110142

111143
switch (C->getValueID()) {
@@ -266,6 +298,7 @@ class StructuralHashImpl {
266298
Hashes.emplace_back(Hash);
267299
Hashes.emplace_back(GlobalHeaderHash);
268300
Hashes.emplace_back(GV.getValueType()->getTypeID());
301+
Hashes.emplace_back(hashGlobalVariable(GV));
269302

270303
// Update the combined hash in place.
271304
Hash = stable_hash_combine(Hashes);
@@ -297,6 +330,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
297330
return H.getHash();
298331
}
299332

333+
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
334+
return StructuralHashImpl::hashGlobalVariable(GVar);
335+
}
336+
300337
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
301338
StructuralHashImpl H(DetailedHash);
302339
H.update(M);
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; This test verifies that global variables are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a paramter.
4+
5+
; RUN: rm -rf %t && split-file %s %t
6+
7+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/string.ll | FileCheck %s
8+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/ns-const.ll | FileCheck %s
9+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/objc-ref.ll | FileCheck %s
10+
11+
; CHECK: _f1.Tgm
12+
; CHECK: _f2.Tgm
13+
14+
;--- string.ll
15+
16+
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
17+
@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
18+
19+
declare noundef i32 @goo(ptr noundef)
20+
21+
define i32 @f1() {
22+
entry:
23+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str)
24+
%add = add nsw i32 %call, 1
25+
ret i32 %add
26+
}
27+
28+
define i32 @f2() {
29+
entry:
30+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1)
31+
%add = add nsw i32 %call, 1
32+
ret i32 %add
33+
}
34+
35+
;--- ns-const.ll
36+
37+
%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
38+
@__CFConstantStringClassReference = external global [0 x i32]
39+
@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
40+
@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
41+
42+
@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
43+
@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
44+
45+
declare noundef i32 @hoo(ptr noundef)
46+
47+
define i32 @f1() {
48+
entry:
49+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
50+
%add = sub nsw i32 %call, 1
51+
ret i32 %add
52+
}
53+
54+
define i32 @f2() {
55+
entry:
56+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
57+
%add = sub nsw i32 %call, 1
58+
ret i32 %add
59+
}
60+
61+
;--- objc-ref.ll
62+
63+
%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
64+
65+
@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
66+
@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
67+
@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
68+
69+
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
70+
@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
71+
72+
@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
73+
@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
74+
75+
define i32 @f1() {
76+
entry:
77+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
78+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
79+
%call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
80+
ret i32 %call
81+
}
82+
83+
declare ptr @objc_msgSend(ptr, ptr, ...)
84+
85+
define i32 @f2() {
86+
entry:
87+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
88+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
89+
%call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
90+
ret i32 %call
91+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; This test verifies that global variables are hashed based on their initial contents,
2+
; allowing them to be outlined even if they appear different due to their names.
3+
4+
; RUN: split-file %s %t
5+
6+
; Check if the outlined function is created locally.
7+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -aarch64-enable-collect-loh=false -filetype=obj %t/local-two.ll -o %t_write_base
8+
; RUN: llvm-objdump -d %t_write_base | FileCheck %s
9+
10+
; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
11+
12+
; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
13+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base
14+
; RUN: llvm-objdump -d %t_read_base | FileCheck %s
15+
16+
; The names of globals `.str` and `.str.4` are different, but their initial contents are identical.
17+
; The outlined function now starts with a reference to that global ("hello\00").
18+
; CHECK: _OUTLINED_FUNCTION_{{.*}}:
19+
; CHECK-NEXT: adrp x1
20+
; CHECK-NEXT: add x1, x1
21+
; CHECK-NEXT: mov w2
22+
; CHECK-NEXT: mov w3
23+
; CHECK-NEXT: mov w4
24+
; CHECK-NEXT: b
25+
26+
;--- local-two.ll
27+
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
28+
@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1
29+
@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1
30+
31+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
32+
define i32 @f1() minsize {
33+
entry:
34+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
35+
ret i32 %call
36+
}
37+
define i32 @f2() minsize {
38+
entry:
39+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
40+
ret i32 %call
41+
}
42+
43+
;--- local-one.ll
44+
@.str.3 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
45+
@.str.4 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
46+
47+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
48+
define i32 @f1() minsize {
49+
entry:
50+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3, ptr noundef nonnull @.str.4, i32 1, i32 2, i32 3)
51+
ret i32 %call
52+
}

0 commit comments

Comments
 (0)