-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[WORKAROUND][BOLT][AArch64] Static binary patching for ELF. #97710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WORKAROUND][BOLT][AArch64] Static binary patching for ELF. #97710
Conversation
@llvm/pr-subscribers-bolt Author: Paschalis Mpeis (paschalis-mpeis) ChangesDRAFT / WIP: Work In Progressstatic binaries fix: don't patch anything that is not in orig text segment Full diff: https://github.com/llvm/llvm-project/pull/97710.diff 3 Files Affected:
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b..12464493bcbb4 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5291,6 +5291,19 @@ void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
GOTContents.size());
++GOTEntry) {
if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
+ auto *Function = BC->getBinaryFunctionAtAddress(*GOTEntry);
+
+ // On static binaries, avoid patching got entries that did not belong to
+ // the original text section. One such special case is the '_init'
+ // function, belonging to the '.init' section.
+ if (BC->IsStaticExecutable &&
+ Function->getOriginSectionName() != ".bolt.org.text") {
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring GOT entry 0x"
+ << Twine::utohexstr(*GOTEntry) << " for '"
+ << Function->getOneName() << "'" << '\n');
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
<< Twine::utohexstr(*GOTEntry) << " with 0x"
<< Twine::utohexstr(NewAddress) << '\n');
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index a74eda8e4a566..c5af2d4d56888 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -706,8 +706,20 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension);
AArch64_AM::ShiftExtendType ExtendType =
AArch64_AM::getArithExtendType(OperandExtension);
- if (ShiftVal != 2)
- llvm_unreachable("Failed to match indirect branch! (fragment 2)");
+ if (ShiftVal != 2) {
+ // TODO: Handle the patten where ShiftVal != 2.
+ // The following code sequence below has no shift amount,
+ // the range could be 0 to 4.
+ // The pattern comes from libc, it occurs when the binary is static.
+ // adr x6, 0x219fb0 <sigall_set+0x88>
+ // add x6, x6, x14, lsl #2
+ // ldr w7, [x6]
+ // add x6, x6, w7, sxtw => no shift amount
+ // br x6
+ errs() << "BOLT-WARNING: "
+ "Failed to match indirect branch: ShiftVAL != 2 \n";
+ return false;
+ }
if (ExtendType == AArch64_AM::SXTB)
ScaleValue = 1LL;
@@ -752,6 +764,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return true;
}
+ if (DefJTBaseAdd->getOpcode() == AArch64::ADR) {
+ // TODO: Handle the pattern where there is no adrp/add pair.
+ // It also occurs when the binary is static.
+ // adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
+ // ldrh w13, [x13, w12, uxtw #1]
+ // adr x12, 0x247b30 <__gettextparse+0x5b0>
+ // add x13, x12, w13, sxth #2
+ // br x13
+ errs() << "BOLT-WARNING: Failed to match indirect branch: "
+ "nop/adr instead of adrp/add \n";
+ return false;
+ }
+
assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
"Failed to match jump table base address pattern! (1)");
diff --git a/bolt/test/AArch64/test-indirect-branch.s b/bolt/test/AArch64/test-indirect-branch.s
new file mode 100644
index 0000000000000..cb9e325654774
--- /dev/null
+++ b/bolt/test/AArch64/test-indirect-branch.s
@@ -0,0 +1,110 @@
+// Test how BOLT handles indirect branch sequence of instructions in
+// AArch64MCPlus builder.
+// This test checks the pattern where there is no shift amount after add
+// instruction. The pattern come from libc, it can be reproduced with
+// a 'static' built binary.
+//
+// adr x6, 0x219fb0 <sigall_set+0x88>
+// add x6, x6, x14, lsl #2
+// ldr w7, [x6]
+// add x6, x6, w7, sxtw => no shift amount
+// br x6
+//
+// It also tests another case where there is no adrp/add pair.
+// The pattern also come from libc, and it only represents in the binary
+// if the lld linker is used to create the static binary.
+// It doesn't occur with GCC ld linker.
+//
+// nop => nop/adr instead of adrp/add
+// adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
+// ldrh w13, [x13, w12, uxtw #1]
+// adr x12, 0x247b30 <__gettextparse+0x5b0>
+// add x13, x12, w13, sxth #2
+// br x13
+
+// clang-format off
+
+// REQUIRES: system-linux
+// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+// RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \
+// RUN: -v=1 2>&1 | FileCheck %s
+
+// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
+// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
+
+
+ .section .text
+ .align 4
+ .globl _start
+ .type _start, %function
+_start:
+ bl bar
+ bl end
+ mov x0, #4
+ mov w8, #93
+ svc #0
+
+bar:
+ mov w1, #3
+ cmp x1, #0
+ b.eq end
+ nop
+ adr x3, jump_table
+ ldrh w3, [x3, x1, lsl #1]
+ adr x1, .case0
+ add x3, x1, w3, sxth #2
+ br x3
+.case0:
+ mov w0, #1
+ ret
+.case1:
+ mov w0, #2
+ ret
+.case3:
+ mov w0, #3
+ ret
+.case4:
+ nop
+ mov x1, #0
+ adr x3, datatable
+ add x3, x3, x1, lsl #2
+ ldr w2, [x3]
+ add x3, x3, w2, sxtw
+ br x3
+ nop
+ mov w0, #4
+ ret
+.case7:
+ mov w0, #4
+ ret
+
+foo1:
+ ret
+
+foo2:
+ add w0, w0, #3
+ ret
+
+foo3:
+ add w0, w0, #3
+ ret
+
+end:
+ add x0, x0, #99
+ ret
+
+ .section .rodata,"a",@progbits
+jump_table:
+ .hword (.case0-.case0)>>2
+ .hword (.case1-.case0)>>2
+ .hword (.case3-.case0)>>2
+ .hword (.case4-.case0)>>2
+ .hword (.case7-.case0)>>2
+
+
+datatable:
+ .word foo1-datatable
+ .word foo2-datatable
+ .word foo3-datatable
+ .word 20
|
c79abd9
to
ec7f768
Compare
Minimal Reproducer:main.c: int main() { return 0; } clang main.c -Wl,-q -static -o out
llvm-bolt out -o out.bolt
./out.bolt |
19e7cb3
to
fa4ded3
Compare
As I see, this branch has not been rebased on top of the recently merged changes with the introduction of the Please, rebase the branch to distinguish the correct set of changes, what makes the review much simpler. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thank you. Also thanks for the test. I personally have a one question related to the test but I'm sure this should not pause the progress.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately, I'm not familiar with the -static
option and didn't manage to find what exactly it means. Why this option is required here, maybe it applies by default? I see BC->IsStaticExecutable
in the check you introduced, do they, the option, and the variable's value related? Is IsStaticExecutable
set to true
for binaries statically linked with the glibc?
Thank you.
Hi, I've filed #100096 to try and capture in detail the issue that this PR is trying to solve. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hello, please rebase the patch for further review
// the '_init' function that belongs to the '.init' section. It has | ||
// '.init' as an alias, which points to the same address of '_init' in the | ||
// original binary. | ||
if (BC->IsStaticExecutable && !Function->Aliases.empty() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This way the function entry won't be patched either I think? It might also be a problem
Executing such a binary would result in: > Unexpected reloc type in static binary. > [1] 1234 abort (core dumped)
When patching statically linked binaries, avoid patching GOT entries that did not belong to the original text section and had an alias. One such special case is the '_init' function that belongs to the '.init' section. It has '.init' as an alias, which points to the same address of '_init' in the original binary. This was observed with GNU linker. BOLT normally rejects these cases. See issue: #100096
ec7f768
to
fe4d176
Compare
In the issue below, it was reached a consensus that BOLT can reject such cases, therefore I am closing this PR. I rebased it (as it was a stacked PR) and added some comments in case someone has such a binary and needs a workaround or a starting point. |
When patching statically linked binaries, avoid patching GOT entries
that did not belong to the original text section and had an alias.
One such special case is the '_init' function that belongs to the '.init'
section. It has '.init' as an alias, which points to the same address of
'_init' in the original binary.
This was observed with GNU linker. BOLT normally rejects these cases.
See issue: