Skip to content

Commit 4f41862

Browse files
committed
Reapply "[StructuralHash] Global Variable (#118412)"
This reverts commit 6a0d6fc.
1 parent bc66e9a commit 4f41862

File tree

7 files changed

+243
-13
lines changed

7 files changed

+243
-13
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ class Module;
3131
/// to true includes instruction and operand type information.
3232
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3333

34+
/// Returns a hash of the global variable \p G.
35+
stable_hash StructuralHash(const GlobalVariable &G);
36+
3437
/// Returns a hash of the module \p M by hashing all functions and global
3538
/// variables contained within. \param M The module to hash. \param DetailedHash
3639
/// Whether or not to encode additional information in the function hashes that

llvm/lib/CodeGen/MachineStableHash.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include "llvm/CodeGen/Register.h"
2828
#include "llvm/Config/llvm-config.h"
2929
#include "llvm/IR/Constants.h"
30+
#include "llvm/IR/GlobalVariable.h"
31+
#include "llvm/IR/StructuralHash.h"
3032
#include "llvm/MC/MCSymbol.h"
3133
#include "llvm/Support/Alignment.h"
3234
#include "llvm/Support/ErrorHandling.h"
@@ -93,13 +95,19 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
9395
return 0;
9496
case MachineOperand::MO_GlobalAddress: {
9597
const GlobalValue *GV = MO.getGlobal();
96-
if (!GV->hasName()) {
97-
++StableHashBailingGlobalAddress;
98-
return 0;
98+
stable_hash GVHash = 0;
99+
if (auto *GVar = dyn_cast<GlobalVariable>(GV))
100+
GVHash = StructuralHash(*GVar);
101+
if (!GVHash) {
102+
if (!GV->hasName()) {
103+
++StableHashBailingGlobalAddress;
104+
return 0;
105+
}
106+
GVHash = stable_hash_name(GV->getName());
99107
}
100-
auto Name = GV->getName();
101-
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
102-
stable_hash_name(Name), MO.getOffset());
108+
109+
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash,
110+
MO.getOffset());
103111
}
104112

105113
case MachineOperand::MO_TargetIndex: {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class StructuralHashImpl {
4646
/// Assign a unique ID to each Value in the order they are first seen.
4747
DenseMap<const Value *, int> ValueToId;
4848

49-
stable_hash hashType(Type *ValueType) {
49+
static stable_hash hashType(Type *ValueType) {
5050
SmallVector<stable_hash> Hashes;
5151
Hashes.emplace_back(ValueType->getTypeID());
5252
if (ValueType->isIntegerTy())
@@ -65,19 +65,47 @@ class StructuralHashImpl {
6565
}
6666
}
6767

68-
stable_hash hashAPInt(const APInt &I) {
68+
static stable_hash hashAPInt(const APInt &I) {
6969
SmallVector<stable_hash> Hashes;
7070
Hashes.emplace_back(I.getBitWidth());
7171
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
7272
Hashes.append(RawVals.begin(), RawVals.end());
7373
return stable_hash_combine(Hashes);
7474
}
7575

76-
stable_hash hashAPFloat(const APFloat &F) {
76+
static stable_hash hashAPFloat(const APFloat &F) {
7777
return hashAPInt(F.bitcastToAPInt());
7878
}
7979

80-
stable_hash hashGlobalValue(const GlobalValue *GV) {
80+
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81+
if (!GVar.hasInitializer())
82+
return hashGlobalValue(&GVar);
83+
84+
// Hash the contents of a string.
85+
if (GVar.getName().starts_with(".str")) {
86+
auto *C = GVar.getInitializer();
87+
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
88+
if (Seq->isString())
89+
return stable_hash_name(Seq->getAsString());
90+
}
91+
92+
// Hash structural contents of Objective-C metadata in specific sections.
93+
// This can be extended to other metadata if needed.
94+
static constexpr const char *SectionNames[] = {
95+
"__cfstring", "__cstring", "__objc_classrefs",
96+
"__objc_methname", "__objc_selrefs",
97+
};
98+
if (GVar.hasSection()) {
99+
StringRef SectionName = GVar.getSection();
100+
for (const char *Name : SectionNames)
101+
if (SectionName.contains(Name))
102+
return hashConstant(GVar.getInitializer());
103+
}
104+
105+
return hashGlobalValue(&GVar);
106+
}
107+
108+
static stable_hash hashGlobalValue(const GlobalValue *GV) {
81109
if (!GV->hasName())
82110
return 0;
83111
return stable_hash_name(GV->getName());
@@ -87,7 +115,7 @@ class StructuralHashImpl {
87115
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
88116
// we're interested in computing a hash rather than comparing two Constants.
89117
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
90-
stable_hash hashConstant(Constant *C) {
118+
static stable_hash hashConstant(const Constant *C) {
91119
SmallVector<stable_hash> Hashes;
92120

93121
Type *Ty = C->getType();
@@ -98,14 +126,21 @@ class StructuralHashImpl {
98126
return stable_hash_combine(Hashes);
99127
}
100128

129+
if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
130+
Hashes.emplace_back(hashGlobalVariable(*GVar));
131+
return stable_hash_combine(Hashes);
132+
}
133+
101134
if (auto *G = dyn_cast<GlobalValue>(C)) {
102135
Hashes.emplace_back(hashGlobalValue(G));
103136
return stable_hash_combine(Hashes);
104137
}
105138

106139
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
107-
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
108-
return stable_hash_combine(Hashes);
140+
if (Seq->isString()) {
141+
Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
142+
return stable_hash_combine(Hashes);
143+
}
109144
}
110145

111146
switch (C->getValueID()) {
@@ -297,6 +332,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
297332
return H.getHash();
298333
}
299334

335+
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
336+
return StructuralHashImpl::hashGlobalVariable(GVar);
337+
}
338+
300339
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
301340
StructuralHashImpl H(DetailedHash);
302341
H.update(M);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; This test verifies that global variables (ns constant) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
10+
%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
11+
@__CFConstantStringClassReference = external global [0 x i32]
12+
@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
13+
@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
14+
15+
@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
16+
@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
17+
18+
declare i32 @hoo(ptr noundef)
19+
20+
define i32 @f1() {
21+
entry:
22+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
23+
%add = sub nsw i32 %call, 1
24+
ret i32 %add
25+
}
26+
27+
define i32 @f2() {
28+
entry:
29+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
30+
%add = sub nsw i32 %call, 1
31+
ret i32 %add
32+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; This test verifies that global variables (objc metadata) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
10+
%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
11+
12+
@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
13+
@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
14+
@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
15+
16+
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
17+
@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
18+
19+
@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
20+
@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
21+
22+
declare ptr @objc_msgSend(ptr, ptr, ...)
23+
24+
define i32 @f1() {
25+
entry:
26+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
27+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
28+
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
29+
ret i32 %call
30+
}
31+
32+
define i32 @f2() {
33+
entry:
34+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
35+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
36+
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
37+
ret i32 %call
38+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; This test verifies that global variables (string) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter.
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
; CHECK-NOT: _f3.Tgm
10+
; CHECK-NOT: _f4.Tgm
11+
12+
; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`.
13+
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
14+
@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
15+
@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1
16+
@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1
17+
18+
declare i32 @goo(ptr noundef)
19+
20+
define i32 @f1() {
21+
entry:
22+
%call = tail call i32 @goo(ptr noundef nonnull @.str)
23+
%add = add nsw i32 %call, 1
24+
ret i32 %add
25+
}
26+
27+
define i32 @f2() {
28+
entry:
29+
%call = tail call i32 @goo(ptr noundef nonnull @.str.1)
30+
%add = add nsw i32 %call, 1
31+
ret i32 %add
32+
}
33+
34+
define i32 @f3() {
35+
entry:
36+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2)
37+
%add = sub nsw i32 %call, 1
38+
ret i32 %add
39+
}
40+
41+
define i32 @f4() {
42+
entry:
43+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3)
44+
%add = sub nsw i32 %call, 1
45+
ret i32 %add
46+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; This test verifies that global variables are hashed based on their initial contents,
2+
; allowing them to be outlined even if they appear different due to their names.
3+
4+
; RUN: split-file %s %t
5+
6+
; The outlined function is created locally.
7+
; Note that `.str.3` is commonly used in both `f1()` and `f2()`.
8+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
9+
; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE
10+
11+
; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}:
12+
; WRITE: adrp x1, l_.str.3
13+
; WRITE-NEXT: add x1, x1, l_.str.3
14+
; WRITE-NEXT: mov w2
15+
; WRITE-NEXT: mov w3
16+
; WRITE-NEXT: mov w4
17+
; WRITE-NEXT: b
18+
19+
; Create an object file and merge it into the cgdata.
20+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
21+
; RUN: -filetype=obj %t/local-two.ll -o %t_write_base
22+
; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
23+
24+
; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
25+
; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata.
26+
27+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \
28+
; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ
29+
30+
; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}:
31+
; READ: adrp x1, l_.str.5
32+
; READ-NEXT: add x1, x1, l_.str.5
33+
; READ-NEXT: mov w2
34+
; READ-NEXT: mov w3
35+
; READ-NEXT: mov w4
36+
; READ-NEXT: b
37+
38+
;--- local-two.ll
39+
@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1
40+
@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1
41+
@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
42+
43+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
44+
define i32 @f1() minsize {
45+
entry:
46+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
47+
ret i32 %call
48+
}
49+
define i32 @f2() minsize {
50+
entry:
51+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
52+
ret i32 %call
53+
}
54+
55+
;--- local-one.ll
56+
@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
57+
@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
58+
59+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
60+
define i32 @f1() minsize {
61+
entry:
62+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3)
63+
ret i32 %call
64+
}

0 commit comments

Comments
 (0)