-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[BOLT] Store FileSymRefs in a multimap #98992
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Store FileSymRefs in a multimap #98992
Conversation
Created using spr 1.3.4
Created using spr 1.3.4
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesWith aggressive ICF, it's possible to have different local symbols FileSymRefs only keeps a single SymbolRef per address, which prevents Work around this issue by switching FileSymRefs to a multimap. In Test Plan: added ambiguous_fragment.test Full diff: https://github.com/llvm/llvm-project/pull/98992.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index af1d9b4b70a3d..16a82d5687de9 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,7 +490,7 @@ class RewriteInstance {
std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;
/// Store all non-zero symbols in this map for a quick address lookup.
- std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
+ std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;
/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ded2f577237fe..6815bae47a007 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -886,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
if (SymName == "__hot_start" || SymName == "__hot_end")
continue;
- FileSymRefs[SymbolAddress] = Symbol;
+ FileSymRefs.emplace(SymbolAddress, Symbol);
// Skip section symbols that will be registered by disassemblePLT().
if (SymbolType == SymbolRef::ST_Debug) {
@@ -1052,7 +1052,9 @@ void RewriteInstance::discoverFileObjects() {
// Remove the symbol from FileSymRefs so that we can skip it from
// in the future.
- auto SI = FileSymRefs.find(SymbolAddress);
+ auto SI = llvm::find_if(
+ llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
+ [&](auto SymIt) { return SymIt.second == Symbol; });
assert(SI != FileSymRefs.end() && "symbol expected to be present");
assert(SI->second == Symbol && "wrong symbol found");
FileSymRefs.erase(SI);
@@ -1433,7 +1435,11 @@ void RewriteInstance::registerFragments() {
const uint64_t Address = BF->getAddress();
// Get fragment's own symbol
- const auto SymIt = FileSymRefs.find(Address);
+ const auto SymIt = llvm::find_if(
+ llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
+ StringRef Name = cantFail(SI.second.getName());
+ return Name.contains(ParentName);
+ });
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.s b/bolt/test/X86/Inputs/ambiguous_fragment.s
new file mode 100644
index 0000000000000..05346ffd7c344
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.s
@@ -0,0 +1,54 @@
+#--- file1
+.file "file1.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file2
+.file "file2.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file3
+.file "file3.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file4
+.file "file4.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file5
+.file "bolt-pseudo.o"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.script b/bolt/test/X86/Inputs/ambiguous_fragment.script
new file mode 100644
index 0000000000000..00129b8887641
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.script
@@ -0,0 +1,6 @@
+SECTIONS {
+ . = 0x10000;
+ .text : { *(.text) }
+ . = 0x20000;
+ .text.cold : { *(.text.cold) }
+}
diff --git a/bolt/test/X86/ambiguous_fragment.test b/bolt/test/X86/ambiguous_fragment.test
new file mode 100644
index 0000000000000..e8170584bc697
--- /dev/null
+++ b/bolt/test/X86/ambiguous_fragment.test
@@ -0,0 +1,28 @@
+## This reproduces a bug with misidentification of a parent fragment.
+
+RUN: split-file %p/Inputs/ambiguous_fragment.s %t
+
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o
+
+RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
+RUN: --script %p/Inputs/ambiguous_fragment.script
+
+RUN: llvm-objcopy %t.exe %t.exe2 \
+RUN: --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
+RUN: --add-symbol=_Zfunc=.text:0xc,function
+
+RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS
+
+RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
+RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s
+
+# PREAGG: B X:0 #__func# 1 0
+
+CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
+CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
+
+CHECK: BOLT-INFO
|
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Is this map live for the entire execution? Is there a reported memory increase? Thanks!
Yes, currently it is live throughout the execution, even though it can be freed at the end of |
Created using spr 1.3.4
With aggressive ICF, it's possible to have different local symbols
(under different FILE symbols) to be mapped to the same address.
FileSymRefs only keeps a single SymbolRef per address, which prevents
fragment matching from finding the correct symbol to perform parent
function lookup.
Work around this issue by switching FileSymRefs to a multimap. In
future, uses of FileSymRefs can be replaced with SortedSymbols which
keeps essentially the same information.
Test Plan: added ambiguous_fragment.test