Skip to content

Commit 6b61e70

Browse files
aaupovyuxuanchen1997
authored andcommitted
[BOLT] Support POSSIBLE_PIC_FIXED_BRANCH
Summary: Detect and support fixed PIC indirect jumps of the following form: ``` movslq En(%rip), %r1 leaq PIC_JUMP_TABLE(%rip), %r2 addq %r2, %r1 jmpq *%r1 ``` with PIC_JUMP_TABLE that looks like following: ``` JT: ---------- E1:| L1 - JT | |----------| E2:| L2 - JT | |----------| | | ...... En:| Ln - JT | ---------- ``` The code could be produced by compilers, see #91648. Test Plan: updated jump-table-fixed-ref-pic.test Reviewers: ayermolo, rafaelauler Reviewed By: rafaelauler Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251366
1 parent 28a3c9a commit 6b61e70

File tree

10 files changed

+153
-59
lines changed

10 files changed

+153
-59
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,9 @@ class BinaryContext {
431431
return nullptr;
432432
}
433433

434+
/// Deregister JumpTable registered at a given \p Address and delete it.
435+
void deleteJumpTable(uint64_t Address);
436+
434437
unsigned getDWARFEncodingSize(unsigned Encoding) {
435438
if (Encoding == dwarf::DW_EH_PE_omit)
436439
return 0;

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ enum class IndirectBranchType : char {
5858
POSSIBLE_PIC_JUMP_TABLE, /// Possibly a jump table for PIC.
5959
POSSIBLE_GOTO, /// Possibly a gcc's computed goto.
6060
POSSIBLE_FIXED_BRANCH, /// Possibly an indirect branch to a fixed location.
61+
POSSIBLE_PIC_FIXED_BRANCH, /// Possibly an indirect jump to a fixed entry in a
62+
/// PIC jump table.
6163
};
6264

6365
class MCPlusBuilder {
@@ -1474,12 +1476,11 @@ class MCPlusBuilder {
14741476
/// will be set to the different components of the branch. \p MemLocInstr
14751477
/// is the instruction that loads up the indirect function pointer. It may
14761478
/// or may not be same as \p Instruction.
1477-
virtual IndirectBranchType
1478-
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
1479-
InstructionIterator End, const unsigned PtrSize,
1480-
MCInst *&MemLocInstr, unsigned &BaseRegNum,
1481-
unsigned &IndexRegNum, int64_t &DispValue,
1482-
const MCExpr *&DispExpr, MCInst *&PCRelBaseOut) const {
1479+
virtual IndirectBranchType analyzeIndirectBranch(
1480+
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
1481+
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
1482+
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
1483+
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const {
14831484
llvm_unreachable("not implemented");
14841485
return IndirectBranchType::UNKNOWN;
14851486
}

bolt/lib/Core/BinaryContext.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
25232523
return nullptr;
25242524
}
25252525

2526+
/// Deregister JumpTable registered at a given \p Address and delete it.
2527+
void BinaryContext::deleteJumpTable(uint64_t Address) {
2528+
assert(JumpTables.count(Address) && "Must have a jump table at address");
2529+
JumpTable *JT = JumpTables.at(Address);
2530+
for (BinaryFunction *Parent : JT->Parents)
2531+
Parent->JumpTables.erase(Address);
2532+
JumpTables.erase(Address);
2533+
delete JT;
2534+
}
2535+
25262536
DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
25272537
const DWARFAddressRangesVector &InputRanges) const {
25282538
DebugAddressRangesVector OutputRanges;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
780780
// setting the value of the register used by the branch.
781781
MCInst *MemLocInstr;
782782

783+
// The instruction loading the fixed PIC jump table entry value.
784+
MCInst *FixedEntryLoadInstr;
785+
783786
// Address of the table referenced by MemLocInstr. Could be either an
784787
// array of function pointers, or a jump table.
785788
uint64_t ArrayStart = 0;
@@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
811814

812815
IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
813816
Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
814-
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
817+
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr);
815818

816819
if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
817820
return BranchType;
@@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
877880
if (BaseRegNum == BC.MRI->getProgramCounter())
878881
ArrayStart += getAddress() + Offset + Size;
879882

883+
if (FixedEntryLoadInstr) {
884+
assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH &&
885+
"Invalid IndirectBranch type");
886+
MCInst::iterator FixedEntryDispOperand =
887+
BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr);
888+
assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() &&
889+
"Invalid memory instruction");
890+
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
891+
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
892+
uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
893+
ErrorOr<int64_t> Value =
894+
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
895+
if (!Value)
896+
return IndirectBranchType::UNKNOWN;
897+
898+
BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this
899+
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
900+
<< " referencing data at 0x" << Twine::utohexstr(EntryAddress)
901+
<< " the destination value is 0x"
902+
<< Twine::utohexstr(ArrayStart + *Value) << '\n';
903+
904+
TargetAddress = ArrayStart + *Value;
905+
906+
// Remove spurious JumpTable at EntryAddress caused by PIC reference from
907+
// the load instruction.
908+
BC.deleteJumpTable(EntryAddress);
909+
910+
// Replace FixedEntryDispExpr used in target address calculation with outer
911+
// jump table reference.
912+
JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart);
913+
assert(JT && "Must have a containing jump table for PIC fixed branch");
914+
BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(),
915+
EntryAddress - ArrayStart, &*BC.Ctx);
916+
917+
return BranchType;
918+
}
919+
880920
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
881921
<< Twine::utohexstr(ArrayStart) << '\n');
882922

@@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
11261166
}
11271167
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
11281168
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
1169+
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
11291170
if (opts::JumpTables == JTS_NONE)
11301171
IsSimple = false;
11311172
break;
@@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches(
18781919
int64_t DispValue;
18791920
const MCExpr *DispExpr;
18801921
MCInst *PCRelBaseInstr;
1922+
MCInst *FixedEntryLoadInstr;
18811923
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
18821924
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
1883-
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
1925+
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr,
1926+
FixedEntryLoadInstr);
18841927
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
18851928
continue;
18861929

bolt/lib/Passes/IndirectCallPromotion.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
386386
JumpTableInfoType HotTargets;
387387
MCInst *MemLocInstr;
388388
MCInst *PCRelBaseOut;
389+
MCInst *FixedEntryLoadInstr;
389390
unsigned BaseReg, IndexReg;
390391
int64_t DispValue;
391392
const MCExpr *DispExpr;
392393
MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
393394
const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
394395
CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
395-
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
396+
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut,
397+
FixedEntryLoadInstr);
396398

397399
assert(MemLocInstr && "There should always be a load for jump tables");
398400
if (!MemLocInstr)

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -852,16 +852,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
852852
return Uses;
853853
}
854854

855-
IndirectBranchType analyzeIndirectBranch(
856-
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
857-
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
858-
unsigned &IndexRegNumOut, int64_t &DispValueOut,
859-
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
855+
IndirectBranchType
856+
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
857+
InstructionIterator End, const unsigned PtrSize,
858+
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
859+
unsigned &IndexRegNumOut, int64_t &DispValueOut,
860+
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
861+
MCInst *&FixedEntryLoadInstr) const override {
860862
MemLocInstrOut = nullptr;
861863
BaseRegNumOut = AArch64::NoRegister;
862864
IndexRegNumOut = AArch64::NoRegister;
863865
DispValueOut = 0;
864866
DispExprOut = nullptr;
867+
FixedEntryLoadInstr = nullptr;
865868

866869
// An instruction referencing memory used by jump instruction (directly or
867870
// via register). This location could be an array of function pointers

bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,14 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
176176
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
177177
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
178178
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
179-
MCInst *&PCRelBaseOut) const override {
179+
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override {
180180
MemLocInstr = nullptr;
181181
BaseRegNum = 0;
182182
IndexRegNum = 0;
183183
DispValue = 0;
184184
DispExpr = nullptr;
185185
PCRelBaseOut = nullptr;
186+
FixedEntryLoadInst = nullptr;
186187

187188
// Check for the following long tail call sequence:
188189
// 1: auipc xi, %pcrel_hi(sym)

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,8 +1866,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
18661866
return true;
18671867
}
18681868

1869+
/// Analyzes PIC-style jump table code template and return identified
1870+
/// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
1871+
/// (POSSIBLE_PIC_FIXED_BRANCH case).
18691872
template <typename Itr>
1870-
std::pair<IndirectBranchType, MCInst *>
1873+
std::tuple<IndirectBranchType, MCInst *, MCInst *>
18711874
analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
18721875
// Analyze PIC-style jump table code template:
18731876
//
@@ -1876,6 +1879,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
18761879
// add %r2, %r1
18771880
// jmp *%r1
18781881
//
1882+
// or a fixed indirect jump template:
1883+
//
1884+
// movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr
1885+
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
1886+
// add %r2, %r1
1887+
// jmp *%r1
1888+
//
18791889
// (with any irrelevant instructions in-between)
18801890
//
18811891
// When we call this helper we've already determined %r1 and %r2, and
@@ -1916,8 +1926,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
19161926
MO.SegRegNum == X86::NoRegister;
19171927
};
19181928
LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
1919-
MCInst *MemLocInstr = nullptr;
1920-
const MCInst *MovInstr = nullptr;
1929+
MCInst *FirstInstr = nullptr;
1930+
MCInst *SecondInstr = nullptr;
1931+
enum {
1932+
NOMATCH = 0,
1933+
MATCH_JUMP_TABLE,
1934+
MATCH_FIXED_BRANCH,
1935+
} MatchingState = NOMATCH;
19211936
while (++II != IE) {
19221937
MCInst &Instr = *II;
19231938
const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
@@ -1926,68 +1941,76 @@ class X86MCPlusBuilder : public MCPlusBuilder {
19261941
// Ignore instructions that don't affect R1, R2 registers.
19271942
continue;
19281943
}
1929-
if (!MovInstr) {
1930-
// Expect to see MOV instruction.
1931-
if (!isMOVSX64rm32(Instr)) {
1932-
LLVM_DEBUG(dbgs() << "MOV instruction expected.\n");
1944+
const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
1945+
const bool IsLEAInstr = isLEA64r(Instr);
1946+
if (MatchingState == NOMATCH) {
1947+
if (IsMOVSXInstr)
1948+
MatchingState = MATCH_JUMP_TABLE;
1949+
else if (IsLEAInstr)
1950+
MatchingState = MATCH_FIXED_BRANCH;
1951+
else
19331952
break;
1934-
}
19351953

1936-
// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
1937-
// If it sets %r1 - rename the registers so we have to only check
1938-
// a single form.
1939-
unsigned MovDestReg = Instr.getOperand(0).getReg();
1940-
if (MovDestReg != R2)
1954+
// Check if the first instruction is setting %r1 or %r2. In canonical
1955+
// form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
1956+
// we have to only check a single form.
1957+
unsigned DestReg = Instr.getOperand(0).getReg();
1958+
MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
1959+
if (DestReg != ExpectReg)
19411960
std::swap(R1, R2);
1942-
if (MovDestReg != R2) {
1943-
LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n");
1961+
if (DestReg != ExpectReg)
19441962
break;
1945-
}
19461963

1947-
// Verify operands for MOV.
1964+
// Verify operands
19481965
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
19491966
if (!MO)
19501967
break;
1951-
if (!isIndexed(*MO, R1))
1952-
// POSSIBLE_PIC_JUMP_TABLE
1968+
if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
1969+
(MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
1970+
FirstInstr = &Instr;
1971+
else
19531972
break;
1954-
MovInstr = &Instr;
19551973
} else {
1956-
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo))
1974+
unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
1975+
if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
19571976
continue;
1958-
if (!isLEA64r(Instr)) {
1959-
LLVM_DEBUG(dbgs() << "LEA instruction expected\n");
1977+
if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
1978+
(MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
19601979
break;
1961-
}
1962-
if (Instr.getOperand(0).getReg() != R1) {
1963-
LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n");
1980+
if (Instr.getOperand(0).getReg() != ExpectReg)
19641981
break;
1965-
}
19661982

1967-
// Verify operands for LEA.
1983+
// Verify operands.
19681984
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
19691985
if (!MO)
19701986
break;
19711987
if (!isRIPRel(*MO))
19721988
break;
1973-
MemLocInstr = &Instr;
1989+
SecondInstr = &Instr;
19741990
break;
19751991
}
19761992
}
19771993

1978-
if (!MemLocInstr)
1979-
return std::make_pair(IndirectBranchType::UNKNOWN, nullptr);
1994+
if (!SecondInstr)
1995+
return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);
19801996

1997+
if (MatchingState == MATCH_FIXED_BRANCH) {
1998+
LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
1999+
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
2000+
FirstInstr, SecondInstr);
2001+
}
19812002
LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
1982-
return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
1983-
MemLocInstr);
2003+
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
2004+
SecondInstr, nullptr);
19842005
}
19852006

1986-
IndirectBranchType analyzeIndirectBranch(
1987-
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
1988-
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
1989-
unsigned &IndexRegNumOut, int64_t &DispValueOut,
1990-
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
2007+
IndirectBranchType
2008+
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
2009+
InstructionIterator End, const unsigned PtrSize,
2010+
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
2011+
unsigned &IndexRegNumOut, int64_t &DispValueOut,
2012+
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
2013+
MCInst *&FixedEntryLoadInst) const override {
19912014
// Try to find a (base) memory location from where the address for
19922015
// the indirect branch is loaded. For X86-64 the memory will be specified
19932016
// in the following format:
@@ -2014,6 +2037,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
20142037
IndexRegNumOut = X86::NoRegister;
20152038
DispValueOut = 0;
20162039
DispExprOut = nullptr;
2040+
FixedEntryLoadInst = nullptr;
20172041

20182042
std::reverse_iterator<InstructionIterator> II(End);
20192043
std::reverse_iterator<InstructionIterator> IE(Begin);
@@ -2046,7 +2070,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
20462070
unsigned R2 = PrevInstr.getOperand(2).getReg();
20472071
if (R1 == R2)
20482072
return IndirectBranchType::UNKNOWN;
2049-
std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2);
2073+
std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
2074+
analyzePICJumpTable(PrevII, IE, R1, R2);
20502075
break;
20512076
}
20522077
return IndirectBranchType::UNKNOWN;
@@ -2090,6 +2115,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
20902115
if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
20912116
return IndirectBranchType::UNKNOWN;
20922117
break;
2118+
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
2119+
break;
20932120
default:
20942121
if (MO->ScaleImm != PtrSize)
20952122
return IndirectBranchType::UNKNOWN;

bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ main:
66
jae .L4
77
cmpq $0x1, %rdi
88
jne .L4
9-
mov .Ljt_pic+8(%rip), %rax
9+
movslq .Ljt_pic+8(%rip), %rax
1010
lea .Ljt_pic(%rip), %rdx
1111
add %rdx, %rax
1212
jmpq *%rax

0 commit comments

Comments
 (0)