Skip to content

Commit 206b4c9

Browse files
[BOLT][AArch64] Implement PLTCall optimization
`convertCallToIndirectCall` applies the PLTCall optimization and returns an (updated if needed) iterator to the converted call instruction. Since AArch64 requires to inject additional instructions to implement this pass, the relevant BasicBlock and an iterator was passed to the `convertCallToIndirectCall`. `NumCallsOptimized` is updated only on successful application of the pass. Tests: - Inputs/plt-tailcall.c: an example of a tail call optimized PLT call. - AArch64/plt-call.test: it is the actual A64 test, that runs the PLTCall optimization on the above input file and verifies the application of the pass to the calls: 'printf' and 'puts'.
1 parent 3bde798 commit 206b4c9

File tree

6 files changed

+95
-11
lines changed

6 files changed

+95
-11
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#ifndef BOLT_CORE_MCPLUSBUILDER_H
1515
#define BOLT_CORE_MCPLUSBUILDER_H
1616

17+
#include "bolt/Core/BinaryBasicBlock.h"
1718
#include "bolt/Core/MCPlus.h"
1819
#include "bolt/Core/Relocation.h"
1920
#include "llvm/ADT/ArrayRef.h"
@@ -1412,9 +1413,15 @@ class MCPlusBuilder {
14121413
return false;
14131414
}
14141415

1415-
/// Modify a direct call instruction \p Inst with an indirect call taking
1416-
/// a destination from a memory location pointed by \p TargetLocation symbol.
1417-
virtual bool convertCallToIndirectCall(MCInst &Inst,
1416+
/// Modify a direct call instruction pointed by the iterator \p It, with an
1417+
/// indirect call taking a destination from a memory location pointed by \p
1418+
/// TargetLocation symbol. If additional instructions need to be prepended
1419+
/// before \p It, then the iterator must be updated to point to the indirect
1420+
/// call instruction.
1421+
///
1422+
/// \return true on success
1423+
virtual bool convertCallToIndirectCall(BinaryBasicBlock &BB,
1424+
BinaryBasicBlock::iterator &It,
14181425
const MCSymbol *TargetLocation,
14191426
MCContext *Ctx) {
14201427
llvm_unreachable("not implemented");

bolt/lib/Passes/PLTCall.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,23 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) {
6161
if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount())
6262
continue;
6363

64-
for (MCInst &Instr : BB) {
65-
if (!BC.MIB->isCall(Instr))
64+
for (auto It = BB.begin(); It != BB.end(); It++) {
65+
if (!BC.MIB->isCall(*It))
6666
continue;
67-
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
67+
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*It);
6868
if (!CallSymbol)
6969
continue;
7070
const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol);
7171
if (!CalleeBF || !CalleeBF->isPLTFunction())
7272
continue;
73-
BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(),
74-
BC.Ctx.get());
75-
BC.MIB->addAnnotation(Instr, "PLTCall", true);
76-
++NumCallsOptimized;
73+
if (BC.MIB->convertCallToIndirectCall(BB, It, CalleeBF->getPLTSymbol(),
74+
BC.Ctx.get())) {
75+
assert(BC.MIB->isCall(*It) &&
76+
"Iterator must point to the optimized call");
77+
78+
BC.MIB->addAnnotation(*It, "PLTCall", true);
79+
++NumCallsOptimized;
80+
}
7781
}
7882
}
7983
}

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,52 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
10551055
return true;
10561056
}
10571057

1058+
bool convertCallToIndirectCall(BinaryBasicBlock &BB,
1059+
BinaryBasicBlock::iterator &It,
1060+
const MCSymbol *TargetLocation,
1061+
MCContext *Ctx) override {
1062+
// Generated code:
1063+
// adrp x16 <symbol>
1064+
// ldr x17, [x16, #<offset>]
1065+
// bl <label> -> blr x17 (or covert 'b -> br' for tail calls)
1066+
1067+
MCInst &InstCall = *It;
1068+
bool IsTailCall = isTailCall(InstCall);
1069+
assert((InstCall.getOpcode() == AArch64::BL ||
1070+
(InstCall.getOpcode() == AArch64::B && IsTailCall)) &&
1071+
"64-bit direct (tail) call instruction expected");
1072+
1073+
// Convert the call to an indicrect one by modifying the instruction.
1074+
InstCall.clear();
1075+
InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR);
1076+
InstCall.addOperand(MCOperand::createReg(AArch64::X17));
1077+
if (IsTailCall)
1078+
setTailCall(*It);
1079+
1080+
// Prepend instructions to load PLT call address from the input symbol.
1081+
1082+
MCInst InstLoad;
1083+
InstLoad.setOpcode(AArch64::LDRXui);
1084+
InstLoad.addOperand(MCOperand::createReg(AArch64::X17));
1085+
InstLoad.addOperand(MCOperand::createReg(AArch64::X16));
1086+
InstLoad.addOperand(MCOperand::createImm(0));
1087+
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation,
1088+
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
1089+
It = BB.insertInstruction(It, InstLoad);
1090+
1091+
MCInst InstAdrp;
1092+
InstAdrp.setOpcode(AArch64::ADRP);
1093+
InstAdrp.clear();
1094+
InstAdrp.addOperand(MCOperand::createReg(AArch64::X16));
1095+
InstAdrp.addOperand(MCOperand::createImm(0));
1096+
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation,
1097+
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
1098+
It = BB.insertInstruction(It, InstAdrp);
1099+
1100+
It = It + 2;
1101+
return true;
1102+
}
1103+
10581104
bool lowerTailCall(MCInst &Inst) override {
10591105
removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
10601106
if (getConditionalTailCall(Inst))

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1639,8 +1639,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
16391639
return true;
16401640
}
16411641

1642-
bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation,
1642+
bool convertCallToIndirectCall(BinaryBasicBlock &BB,
1643+
BinaryBasicBlock::iterator &It,
1644+
const MCSymbol *TargetLocation,
16431645
MCContext *Ctx) override {
1646+
MCInst &Inst = (*It);
16441647
assert((Inst.getOpcode() == X86::CALL64pcrel32 ||
16451648
(Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) &&
16461649
"64-bit direct (tail) call instruction expected");

bolt/test/AArch64/plt-call.test

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Verify that PLTCall optimization works, including when PLT calls were
2+
// tail-call optimized.
3+
4+
RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
5+
RUN: -o %t -Wl,-q
6+
RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s
7+
8+
// Call to printf
9+
CHECK: adrp x16, printf@GOT
10+
CHECK: ldr x17, [x16, :lo12:printf@GOT]
11+
CHECK: blr x17 # PLTCall: 1
12+
13+
// Call to puts, that was tail-call optimized
14+
CHECK: adrp x16, puts@GOT
15+
CHECK: ldr x17, [x16, :lo12:puts@GOT]
16+
CHECK: br x17 # TAILCALL # PLTCall: 1

bolt/test/Inputs/plt-tailcall.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include "stub.h"
2+
3+
int foo(char *c) {
4+
printf("");
5+
__attribute__((musttail)) return puts(c);
6+
}
7+
8+
int main() { return foo("a"); }

0 commit comments

Comments
 (0)