Skip to content

Commit a13bc97

Browse files
[BOLT][AArch64] Implement PLTCall optimization (#93584)
`convertCallToIndirectCall` applies the PLTCall optimization and returns an (updated if needed) iterator to the converted call instruction. Since AArch64 requires to inject additional instructions to implement this pass, the relevant BasicBlock and an iterator was passed to the `convertCallToIndirectCall`. `NumCallsOptimized` is updated only on successful application of the pass. Tests: - Inputs/plt-tailcall.c: an example of a tail call optimized PLT call. - AArch64/plt-call.test: it is the actual A64 test, that runs the PLTCall optimization on the above input file and verifies the application of the pass to the calls: 'printf' and 'puts'.
1 parent ca63860 commit a13bc97

File tree

7 files changed

+104
-19
lines changed

7 files changed

+104
-19
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,13 +1412,14 @@ class MCPlusBuilder {
14121412
return false;
14131413
}
14141414

1415-
/// Modify a direct call instruction \p Inst with an indirect call taking
1416-
/// a destination from a memory location pointed by \p TargetLocation symbol.
1417-
virtual bool convertCallToIndirectCall(MCInst &Inst,
1418-
const MCSymbol *TargetLocation,
1419-
MCContext *Ctx) {
1415+
/// Creates an indirect call to the function within the \p DirectCall PLT
1416+
/// stub. The function's memory location is pointed by the \p TargetLocation
1417+
/// symbol.
1418+
virtual InstructionListType
1419+
createIndirectPltCall(const MCInst &DirectCall,
1420+
const MCSymbol *TargetLocation, MCContext *Ctx) {
14201421
llvm_unreachable("not implemented");
1421-
return false;
1422+
return {};
14221423
}
14231424

14241425
/// Morph an indirect call into a load where \p Reg holds the call target.

bolt/lib/Passes/PLTCall.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) {
4848
return Error::success();
4949

5050
uint64_t NumCallsOptimized = 0;
51-
for (auto &It : BC.getBinaryFunctions()) {
52-
BinaryFunction &Function = It.second;
51+
for (auto &BFI : BC.getBinaryFunctions()) {
52+
BinaryFunction &Function = BFI.second;
5353
if (!shouldOptimize(Function))
5454
continue;
5555

@@ -61,18 +61,21 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) {
6161
if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount())
6262
continue;
6363

64-
for (MCInst &Instr : BB) {
65-
if (!BC.MIB->isCall(Instr))
64+
for (auto II = BB.begin(); II != BB.end(); II++) {
65+
if (!BC.MIB->isCall(*II))
6666
continue;
67-
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
67+
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*II);
6868
if (!CallSymbol)
6969
continue;
7070
const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol);
7171
if (!CalleeBF || !CalleeBF->isPLTFunction())
7272
continue;
73-
BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(),
74-
BC.Ctx.get());
75-
BC.MIB->addAnnotation(Instr, "PLTCall", true);
73+
const InstructionListType NewCode = BC.MIB->createIndirectPltCall(
74+
*II, CalleeBF->getPLTSymbol(), BC.Ctx.get());
75+
II = BB.replaceInstruction(II, NewCode);
76+
assert(!NewCode.empty() && "PLT Call replacement must be non-empty");
77+
std::advance(II, NewCode.size() - 1);
78+
BC.MIB->addAnnotation(*II, "PLTCall", true);
7679
++NumCallsOptimized;
7780
}
7881
}

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,47 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
10541054
return true;
10551055
}
10561056

1057+
InstructionListType createIndirectPltCall(const MCInst &DirectCall,
1058+
const MCSymbol *TargetLocation,
1059+
MCContext *Ctx) override {
1060+
const bool IsTailCall = isTailCall(DirectCall);
1061+
assert((DirectCall.getOpcode() == AArch64::BL ||
1062+
(DirectCall.getOpcode() == AArch64::B && IsTailCall)) &&
1063+
"64-bit direct (tail) call instruction expected");
1064+
1065+
InstructionListType Code;
1066+
// Code sequence for indirect plt call:
1067+
// adrp x16 <symbol>
1068+
// ldr x17, [x16, #<offset>]
1069+
// blr x17 ; or 'br' for tail calls
1070+
1071+
MCInst InstAdrp;
1072+
InstAdrp.setOpcode(AArch64::ADRP);
1073+
InstAdrp.addOperand(MCOperand::createReg(AArch64::X16));
1074+
InstAdrp.addOperand(MCOperand::createImm(0));
1075+
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation,
1076+
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
1077+
Code.emplace_back(InstAdrp);
1078+
1079+
MCInst InstLoad;
1080+
InstLoad.setOpcode(AArch64::LDRXui);
1081+
InstLoad.addOperand(MCOperand::createReg(AArch64::X17));
1082+
InstLoad.addOperand(MCOperand::createReg(AArch64::X16));
1083+
InstLoad.addOperand(MCOperand::createImm(0));
1084+
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation,
1085+
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
1086+
Code.emplace_back(InstLoad);
1087+
1088+
MCInst InstCall;
1089+
InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR);
1090+
InstCall.addOperand(MCOperand::createReg(AArch64::X17));
1091+
if (IsTailCall)
1092+
setTailCall(InstCall);
1093+
Code.emplace_back(InstCall);
1094+
1095+
return Code;
1096+
}
1097+
10571098
bool lowerTailCall(MCInst &Inst) override {
10581099
removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
10591100
if (getConditionalTailCall(Inst))

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,11 +1639,16 @@ class X86MCPlusBuilder : public MCPlusBuilder {
16391639
return true;
16401640
}
16411641

1642-
bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation,
1643-
MCContext *Ctx) override {
1644-
assert((Inst.getOpcode() == X86::CALL64pcrel32 ||
1645-
(Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) &&
1642+
InstructionListType createIndirectPltCall(const MCInst &DirectCall,
1643+
const MCSymbol *TargetLocation,
1644+
MCContext *Ctx) override {
1645+
assert((DirectCall.getOpcode() == X86::CALL64pcrel32 ||
1646+
(DirectCall.getOpcode() == X86::JMP_4 && isTailCall(DirectCall))) &&
16461647
"64-bit direct (tail) call instruction expected");
1648+
1649+
InstructionListType Code;
1650+
// Create a new indirect call by converting the previous direct call.
1651+
MCInst Inst = DirectCall;
16471652
const auto NewOpcode =
16481653
(Inst.getOpcode() == X86::CALL64pcrel32) ? X86::CALL64m : X86::JMP32m;
16491654
Inst.setOpcode(NewOpcode);
@@ -1664,7 +1669,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
16641669
Inst.insert(Inst.begin(),
16651670
MCOperand::createReg(X86::RIP)); // BaseReg
16661671

1667-
return true;
1672+
Code.emplace_back(Inst);
1673+
return Code;
16681674
}
16691675

16701676
void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override {

bolt/test/AArch64/plt-call.test

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Verify that PLTCall optimization works.
2+
3+
RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
4+
RUN: -o %t -Wl,-q
5+
RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s
6+
7+
// Call to printf
8+
CHECK: adrp x16, printf@GOT
9+
CHECK: ldr x17, [x16, :lo12:printf@GOT]
10+
CHECK: blr x17 # PLTCall: 1
11+
12+
// Call to puts, that was tail-call optimized
13+
CHECK: adrp x16, puts@GOT
14+
CHECK: ldr x17, [x16, :lo12:puts@GOT]
15+
CHECK: br x17 # TAILCALL # PLTCall: 1

bolt/test/Inputs/plt-tailcall.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include "stub.h"
2+
3+
int foo(char *c) {
4+
printf("");
5+
__attribute__((musttail)) return puts(c);
6+
}
7+
8+
int main() { return foo("a"); }

bolt/test/X86/plt-call.test

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Verify that PLTCall optimization works.
2+
3+
RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
4+
RUN: -o %t -Wl,-q
5+
RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s
6+
7+
// Call to printf
8+
CHECK: callq *printf@GOT(%rip) # PLTCall: 1
9+
10+
// Call to puts, that was tail-call optimized
11+
CHECK: jmpl *puts@GOT(%rip) # TAILCALL # PLTCall: 1

0 commit comments

Comments
 (0)