-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[StructuralHash] Global Variable #118412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[StructuralHash] Global Variable #118412
Changes from all commits
fb58d52
b08aab8
58f729a
d8d6cb7
c00f963
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,7 +46,7 @@ class StructuralHashImpl { | |
/// Assign a unique ID to each Value in the order they are first seen. | ||
DenseMap<const Value *, int> ValueToId; | ||
|
||
stable_hash hashType(Type *ValueType) { | ||
static stable_hash hashType(Type *ValueType) { | ||
SmallVector<stable_hash> Hashes; | ||
Hashes.emplace_back(ValueType->getTypeID()); | ||
if (ValueType->isIntegerTy()) | ||
|
@@ -65,19 +65,47 @@ class StructuralHashImpl { | |
} | ||
} | ||
|
||
stable_hash hashAPInt(const APInt &I) { | ||
static stable_hash hashAPInt(const APInt &I) { | ||
SmallVector<stable_hash> Hashes; | ||
Hashes.emplace_back(I.getBitWidth()); | ||
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); | ||
Hashes.append(RawVals.begin(), RawVals.end()); | ||
return stable_hash_combine(Hashes); | ||
} | ||
|
||
stable_hash hashAPFloat(const APFloat &F) { | ||
static stable_hash hashAPFloat(const APFloat &F) { | ||
return hashAPInt(F.bitcastToAPInt()); | ||
} | ||
|
||
stable_hash hashGlobalValue(const GlobalValue *GV) { | ||
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { | ||
if (!GVar.hasInitializer()) | ||
return hashGlobalValue(&GVar); | ||
|
||
// Hash the contents of a string. | ||
if (GVar.getName().starts_with(".str")) { | ||
auto *C = GVar.getInitializer(); | ||
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) | ||
if (Seq->isString()) | ||
return stable_hash_name(Seq->getAsString()); | ||
} | ||
|
||
// Hash structural contents of Objective-C metadata in specific sections. | ||
// This can be extended to other metadata if needed. | ||
static constexpr const char *SectionNames[] = { | ||
"__cfstring", "__cstring", "__objc_classrefs", | ||
"__objc_methname", "__objc_selrefs", | ||
}; | ||
if (GVar.hasSection()) { | ||
StringRef SectionName = GVar.getSection(); | ||
for (const char *Name : SectionNames) | ||
if (SectionName.contains(Name)) | ||
ellishg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return hashConstant(GVar.getInitializer()); | ||
} | ||
|
||
return hashGlobalValue(&GVar); | ||
} | ||
|
||
static stable_hash hashGlobalValue(const GlobalValue *GV) { | ||
if (!GV->hasName()) | ||
return 0; | ||
return stable_hash_name(GV->getName()); | ||
|
@@ -87,7 +115,7 @@ class StructuralHashImpl { | |
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here | ||
// we're interested in computing a hash rather than comparing two Constants. | ||
// Some of the logic is simplified, e.g, we don't expand GEPOperator. | ||
stable_hash hashConstant(Constant *C) { | ||
static stable_hash hashConstant(const Constant *C) { | ||
SmallVector<stable_hash> Hashes; | ||
|
||
Type *Ty = C->getType(); | ||
|
@@ -98,14 +126,21 @@ class StructuralHashImpl { | |
return stable_hash_combine(Hashes); | ||
} | ||
|
||
if (auto *GVar = dyn_cast<GlobalVariable>(C)) { | ||
Hashes.emplace_back(hashGlobalVariable(*GVar)); | ||
return stable_hash_combine(Hashes); | ||
} | ||
|
||
if (auto *G = dyn_cast<GlobalValue>(C)) { | ||
Hashes.emplace_back(hashGlobalValue(G)); | ||
return stable_hash_combine(Hashes); | ||
} | ||
|
||
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) { | ||
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); | ||
return stable_hash_combine(Hashes); | ||
if (Seq->isString()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you see below in the switch statement, |
||
Hashes.emplace_back(stable_hash_name(Seq->getAsString())); | ||
return stable_hash_combine(Hashes); | ||
} | ||
} | ||
|
||
switch (C->getValueID()) { | ||
|
@@ -266,6 +301,7 @@ class StructuralHashImpl { | |
Hashes.emplace_back(Hash); | ||
Hashes.emplace_back(GlobalHeaderHash); | ||
Hashes.emplace_back(GV.getValueType()->getTypeID()); | ||
Hashes.emplace_back(hashGlobalVariable(GV)); | ||
|
||
// Update the combined hash in place. | ||
Hash = stable_hash_combine(Hashes); | ||
|
@@ -297,6 +333,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { | |
return H.getHash(); | ||
} | ||
|
||
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { | ||
return StructuralHashImpl::hashGlobalVariable(GVar); | ||
} | ||
|
||
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { | ||
StructuralHashImpl H(DetailedHash); | ||
H.update(M); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
; This test verifies that global variables (ns constant) are hashed based on their initial contents, | ||
; allowing them to be merged even if they appear different due to their names. | ||
; Now they become identical functions that can be merged without creating a parameter | ||
|
||
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s | ||
|
||
; CHECK: _f1.Tgm | ||
; CHECK: _f2.Tgm | ||
|
||
%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } | ||
@__CFConstantStringClassReference = external global [0 x i32] | ||
@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 | ||
@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 | ||
|
||
@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 | ||
@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 | ||
|
||
declare i32 @hoo(ptr noundef) | ||
|
||
define i32 @f1() { | ||
entry: | ||
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) | ||
%add = sub nsw i32 %call, 1 | ||
ret i32 %add | ||
} | ||
|
||
define i32 @f2() { | ||
entry: | ||
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) | ||
%add = sub nsw i32 %call, 1 | ||
ret i32 %add | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
; This test verifies that global variables (objc metadata) are hashed based on their initial contents, | ||
; allowing them to be merged even if they appear different due to their names. | ||
; Now they become identical functions that can be merged without creating a parameter | ||
|
||
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s | ||
|
||
; CHECK: _f1.Tgm | ||
; CHECK: _f2.Tgm | ||
|
||
%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } | ||
|
||
@"OBJC_CLASS_$_MyClass" = external global %struct._class_t | ||
@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 | ||
@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 | ||
|
||
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 | ||
@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 | ||
|
||
@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 | ||
@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 | ||
|
||
declare ptr @objc_msgSend(ptr, ptr, ...) | ||
|
||
define i32 @f1() { | ||
entry: | ||
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 | ||
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 | ||
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) | ||
ret i32 %call | ||
} | ||
|
||
define i32 @f2() { | ||
entry: | ||
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 | ||
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 | ||
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) | ||
ret i32 %call | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
; This test verifies that global variables (string) are hashed based on their initial contents, | ||
; allowing them to be merged even if they appear different due to their names. | ||
; Now they become identical functions that can be merged without creating a parameter. | ||
|
||
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s | ||
|
||
; CHECK: _f1.Tgm | ||
; CHECK: _f2.Tgm | ||
; CHECK-NOT: _f3.Tgm | ||
; CHECK-NOT: _f4.Tgm | ||
|
||
; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`. | ||
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 | ||
@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 | ||
@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1 | ||
@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1 | ||
|
||
declare i32 @goo(ptr noundef) | ||
|
||
define i32 @f1() { | ||
entry: | ||
%call = tail call i32 @goo(ptr noundef nonnull @.str) | ||
%add = add nsw i32 %call, 1 | ||
ret i32 %add | ||
} | ||
|
||
define i32 @f2() { | ||
entry: | ||
%call = tail call i32 @goo(ptr noundef nonnull @.str.1) | ||
%add = add nsw i32 %call, 1 | ||
ret i32 %add | ||
} | ||
|
||
define i32 @f3() { | ||
entry: | ||
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2) | ||
%add = sub nsw i32 %call, 1 | ||
ret i32 %add | ||
} | ||
|
||
define i32 @f4() { | ||
entry: | ||
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3) | ||
%add = sub nsw i32 %call, 1 | ||
ret i32 %add | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
; This test verifies that global variables are hashed based on their initial contents, | ||
; allowing them to be outlined even if they appear different due to their names. | ||
|
||
; RUN: split-file %s %t | ||
|
||
; The outlined function is created locally. | ||
; Note that `.str.3` is commonly used in both `f1()` and `f2()`. | ||
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ | ||
; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE | ||
|
||
; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}: | ||
; WRITE: adrp x1, l_.str.3 | ||
; WRITE-NEXT: add x1, x1, l_.str.3 | ||
; WRITE-NEXT: mov w2 | ||
; WRITE-NEXT: mov w3 | ||
; WRITE-NEXT: mov w4 | ||
; WRITE-NEXT: b | ||
|
||
; Create an object file and merge it into the cgdata. | ||
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ | ||
; RUN: -filetype=obj %t/local-two.ll -o %t_write_base | ||
; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base | ||
|
||
; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. | ||
; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata. | ||
|
||
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \ | ||
; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ | ||
|
||
; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}: | ||
; READ: adrp x1, l_.str.5 | ||
; READ-NEXT: add x1, x1, l_.str.5 | ||
; READ-NEXT: mov w2 | ||
; READ-NEXT: mov w3 | ||
; READ-NEXT: mov w4 | ||
; READ-NEXT: b | ||
|
||
;--- local-two.ll | ||
@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 | ||
@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 | ||
@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 | ||
|
||
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) | ||
define i32 @f1() minsize { | ||
entry: | ||
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) | ||
ret i32 %call | ||
} | ||
define i32 @f2() minsize { | ||
entry: | ||
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) | ||
ret i32 %call | ||
} | ||
|
||
;--- local-one.ll | ||
@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 | ||
@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 | ||
|
||
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) | ||
define i32 @f1() minsize { | ||
entry: | ||
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3) | ||
ret i32 %call | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like we can generate meaningful hash for globals regardless of the presence of name. Should we move the
hasName
check after this?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved
hasName
check below.