Skip to content

Commit 96e5ee2

Browse files
authored
[BOLT][AArch64] Add partial support for lite mode (#133014)
In lite mode, we only emit code for a subset of functions while preserving the original code in .bolt.org.text. This requires updating code references in non-emitted functions to ensure that: * Non-optimized versions of the optimized code never execute. * Function pointer comparison semantics is preserved. On x86-64, we can update code references in-place using "pending relocations" added in scanExternalRefs(). However, on AArch64, this is not always possible due to address range limitations and linker address "relaxation". There are two types of code-to-code references: control transfer (e.g., calls and branches) and function pointer materialization. AArch64-specific control transfer instructions are covered by #116964. For function pointer materialization, simply changing the immediate field of an instruction is not always sufficient. In some cases, we need to modify a pair of instructions, such as undoing linker relaxation and converting NOP+ADR into ADRP+ADD sequence. To achieve this, we use the instruction patch mechanism instead of pending relocations. Instruction patches are emitted via the regular MC layer, just like regular functions. However, they have a fixed address and do not have an associated symbol table entry. This allows us to make more complex changes to the code, ensuring that function pointers are correctly updated. Such mechanism should also be portable to RISC-V and other architectures. To summarize, for AArch64, we extend the scanExternalRefs() process to undo linker relaxation and use instruction patches to partially overwrite unoptimized code.
1 parent 0ed4bdf commit 96e5ee2

File tree

9 files changed

+282
-23
lines changed

9 files changed

+282
-23
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,10 @@ class BinaryContext {
544544
///
545545
/// Optional \p Name can be assigned to the patch. The name will be emitted to
546546
/// the symbol table at \p Address.
547-
BinaryFunction *createInstructionPatch(uint64_t Address,
548-
InstructionListType &Instructions,
549-
const Twine &Name = "");
547+
BinaryFunction *
548+
createInstructionPatch(uint64_t Address,
549+
const InstructionListType &Instructions,
550+
const Twine &Name = "");
550551

551552
std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
552553
return InjectedBinaryFunctions;

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,12 @@ class BinaryFunction {
357357
/// True if another function body was merged into this one.
358358
bool HasFunctionsFoldedInto{false};
359359

360+
/// True if the function is used for patching code at a fixed address.
361+
bool IsPatch{false};
362+
363+
/// True if the function should not have an associated symbol table entry.
364+
bool IsAnonymous{false};
365+
360366
/// Name for the section this function code should reside in.
361367
std::string CodeSectionName;
362368

@@ -1358,6 +1364,12 @@ class BinaryFunction {
13581364
/// Return true if other functions were folded into this one.
13591365
bool hasFunctionsFoldedInto() const { return HasFunctionsFoldedInto; }
13601366

1367+
/// Return true if this function is used for patching existing code.
1368+
bool isPatch() const { return IsPatch; }
1369+
1370+
/// Return true if the function should not have associated symbol table entry.
1371+
bool isAnonymous() const { return IsAnonymous; }
1372+
13611373
/// If this function was folded, return the function it was folded into.
13621374
BinaryFunction *getFoldedIntoFunction() const { return FoldedIntoFunction; }
13631375

@@ -1734,6 +1746,18 @@ class BinaryFunction {
17341746
/// Indicate that another function body was merged with this function.
17351747
void setHasFunctionsFoldedInto() { HasFunctionsFoldedInto = true; }
17361748

1749+
/// Indicate that this function is a patch.
1750+
void setIsPatch(bool V) {
1751+
assert(isInjected() && "Only injected functions can be used as patches");
1752+
IsPatch = V;
1753+
}
1754+
1755+
/// Indicate if the function should have a name in the symbol table.
1756+
void setAnonymous(bool V) {
1757+
assert(isInjected() && "Only injected functions could be anonymous");
1758+
IsAnonymous = V;
1759+
}
1760+
17371761
void setHasSDTMarker(bool V) { HasSDTMarker = V; }
17381762

17391763
/// Mark the function as using ORC format for stack unwinding.

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,9 +1264,12 @@ class MCPlusBuilder {
12641264
return nullptr;
12651265
}
12661266

1267-
/// Return MCSymbol extracted from a target expression
1267+
/// Return MCSymbol extracted from the expression.
12681268
virtual const MCSymbol *getTargetSymbol(const MCExpr *Expr) const {
1269-
return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();
1269+
if (auto *SymbolRefExpr = dyn_cast<const MCSymbolRefExpr>(Expr))
1270+
return &SymbolRefExpr->getSymbol();
1271+
1272+
return nullptr;
12701273
}
12711274

12721275
/// Return addend that represents an offset from MCSymbol target

bolt/lib/Core/BinaryContext.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,8 +2401,10 @@ BinaryContext::createInjectedBinaryFunction(const std::string &Name,
24012401
return BF;
24022402
}
24032403

2404-
BinaryFunction *BinaryContext::createInstructionPatch(
2405-
uint64_t Address, InstructionListType &Instructions, const Twine &Name) {
2404+
BinaryFunction *
2405+
BinaryContext::createInstructionPatch(uint64_t Address,
2406+
const InstructionListType &Instructions,
2407+
const Twine &Name) {
24062408
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
24072409
assert(Section && "cannot get section for patching");
24082410
assert(Section->hasSectionRef() && Section->isText() &&
@@ -2423,6 +2425,11 @@ BinaryFunction *BinaryContext::createInstructionPatch(
24232425
PBF->setFileOffset(FileOffset);
24242426
PBF->setOriginSection(&Section.get());
24252427
PBF->addBasicBlock()->addInstructions(Instructions);
2428+
PBF->setIsPatch(true);
2429+
2430+
// Don't create symbol table entry if the name wasn't specified.
2431+
if (Name.str().empty())
2432+
PBF->setAnonymous(true);
24262433

24272434
return PBF;
24282435
}

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 126 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,13 +1583,18 @@ bool BinaryFunction::scanExternalRefs() {
15831583
assert(FunctionData.size() == getMaxSize() &&
15841584
"function size does not match raw data size");
15851585

1586-
if (BC.isX86())
1587-
BC.SymbolicDisAsm->setSymbolizer(
1588-
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1586+
BC.SymbolicDisAsm->setSymbolizer(
1587+
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
1588+
1589+
// A list of patches for this function.
1590+
using PatchTy = std::pair<uint64_t, MCInst>;
1591+
std::vector<PatchTy> InstructionPatches;
15891592

15901593
// Disassemble contents of the function. Detect code entry points and create
15911594
// relocations for references to code that will be moved.
15921595
uint64_t Size = 0; // instruction size
1596+
MCInst Instruction;
1597+
MCInst PrevInstruction;
15931598
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
15941599
// Check for data inside code and ignore it
15951600
if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
@@ -1598,7 +1603,7 @@ bool BinaryFunction::scanExternalRefs() {
15981603
}
15991604

16001605
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
1601-
MCInst Instruction;
1606+
PrevInstruction = Instruction;
16021607
if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
16031608
FunctionData.slice(Offset),
16041609
AbsoluteInstrAddr, nulls())) {
@@ -1673,12 +1678,108 @@ bool BinaryFunction::scanExternalRefs() {
16731678
if (BranchTargetSymbol) {
16741679
BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
16751680
Emitter.LocalCtx.get());
1676-
} else if (!llvm::any_of(Instruction,
1677-
[](const MCOperand &Op) { return Op.isExpr(); })) {
1678-
// Skip assembly if the instruction may not have any symbolic operands.
1679-
continue;
16801681
} else {
16811682
analyzeInstructionForFuncReference(Instruction);
1683+
const bool NeedsPatch = llvm::any_of(
1684+
MCPlus::primeOperands(Instruction), [&](const MCOperand &Op) {
1685+
return Op.isExpr() &&
1686+
!ignoreReference(BC.MIB->getTargetSymbol(Op.getExpr()));
1687+
});
1688+
if (!NeedsPatch)
1689+
continue;
1690+
}
1691+
1692+
// For AArch64, we need to undo relaxation done by the linker if the target
1693+
// of the instruction is a function that we plan to move.
1694+
//
1695+
// Linker relaxation is documented at:
1696+
// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
1697+
// under #relocation-optimization.
1698+
if (const Relocation *Rel;
1699+
BC.isAArch64() && (Rel = getRelocationAt(Offset))) {
1700+
// NOP+ADR sequence can originate from either ADRP+ADD or ADRP+LDR.
1701+
// In either case, we convert it into ADRP+ADD.
1702+
if (BC.MIB->isADR(Instruction) &&
1703+
(Rel->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
1704+
Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC)) {
1705+
if (!BC.MIB->isNoop(PrevInstruction)) {
1706+
// In case of unexpected conversion from the linker, skip target
1707+
// optimization.
1708+
const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Instruction);
1709+
BC.errs() << "BOLT-WARNING: cannot undo linker relaxation for "
1710+
"instruction at 0x"
1711+
<< Twine::utohexstr(AbsoluteInstrAddr) << " referencing "
1712+
<< Symbol->getName() << '\n';
1713+
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol))
1714+
TargetBF->setIgnored();
1715+
continue;
1716+
}
1717+
1718+
InstructionListType AdrpAdd =
1719+
BC.MIB->undoAdrpAddRelaxation(Instruction, BC.Ctx.get());
1720+
assert(AdrpAdd.size() == 2 && "Two instructions expected");
1721+
LLVM_DEBUG({
1722+
dbgs() << "BOLT-DEBUG: linker relaxation undone for instruction "
1723+
"at 0x"
1724+
<< Twine::utohexstr(AbsoluteInstrAddr) << '\n';
1725+
});
1726+
InstructionPatches.push_back({AbsoluteInstrAddr - 4, AdrpAdd[0]});
1727+
InstructionPatches.push_back({AbsoluteInstrAddr, AdrpAdd[1]});
1728+
continue;
1729+
}
1730+
1731+
// If ADR was emitted by the compiler/assembler to reference a nearby
1732+
// local function, we cannot move away that function due to ADR address
1733+
// span limitation. Hence, we skip the optimization.
1734+
if (BC.MIB->isADR(Instruction) &&
1735+
Rel->Type == ELF::R_AARCH64_ADR_PREL_LO21) {
1736+
BC.errs() << "BOLT-WARNING: unable to convert ADR that references "
1737+
<< Rel->Symbol->getName()
1738+
<< ". Will not optimize the target\n";
1739+
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
1740+
TargetBF->setIgnored();
1741+
continue;
1742+
}
1743+
1744+
// In the case of GOT load, ADRP+LDR can also be converted into ADRP+ADD.
1745+
// When this happens, it's not always possible to properly symbolize ADRP
1746+
// operand and we might have to adjust the operand based on the next
1747+
// instruction.
1748+
if (BC.MIB->isAddXri(Instruction) &&
1749+
Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
1750+
if (!BC.MIB->matchAdrpAddPair(PrevInstruction, Instruction)) {
1751+
BC.errs() << "BOLT-ERROR: cannot find matching ADRP for relaxed LDR "
1752+
"instruction at 0x"
1753+
<< Twine::utohexstr(AbsoluteInstrAddr) << '\n';
1754+
exit(1);
1755+
}
1756+
1757+
// Check if ADRP was already patched. If not, add a new patch for it.
1758+
if (InstructionPatches.empty() ||
1759+
InstructionPatches.back().first != AbsoluteInstrAddr - 4)
1760+
InstructionPatches.push_back(
1761+
{AbsoluteInstrAddr - 4, PrevInstruction});
1762+
1763+
// Adjust the operand for ADRP from the patch.
1764+
MCInst &ADRPInst = InstructionPatches.back().second;
1765+
const MCSymbol *ADRPSymbol = BC.MIB->getTargetSymbol(ADRPInst);
1766+
const MCSymbol *ADDSymbol = BC.MIB->getTargetSymbol(Instruction);
1767+
if (ADRPSymbol != ADDSymbol) {
1768+
const int64_t Addend = BC.MIB->getTargetAddend(Instruction);
1769+
BC.MIB->setOperandToSymbolRef(ADRPInst, /*OpNum*/ 1, ADDSymbol,
1770+
Addend, BC.Ctx.get(),
1771+
ELF::R_AARCH64_NONE);
1772+
}
1773+
}
1774+
}
1775+
1776+
// On AArch64, we use instruction patches for fixing references. We make an
1777+
// exception for branch instructions since they require optional
1778+
// relocations.
1779+
if (BC.isAArch64() && !BranchTargetSymbol) {
1780+
LLVM_DEBUG(BC.printInstruction(dbgs(), Instruction, AbsoluteInstrAddr));
1781+
InstructionPatches.push_back({AbsoluteInstrAddr, Instruction});
1782+
continue;
16821783
}
16831784

16841785
// Emit the instruction using temp emitter and generate relocations.
@@ -1720,6 +1821,23 @@ bool BinaryFunction::scanExternalRefs() {
17201821
for (Relocation &Rel : FunctionRelocations)
17211822
getOriginSection()->addPendingRelocation(Rel);
17221823

1824+
// Add patches grouping them together.
1825+
if (!InstructionPatches.empty()) {
1826+
uint64_t PatchGroupAddress;
1827+
InstructionListType PatchGroup;
1828+
for (auto PI = InstructionPatches.begin(), PE = InstructionPatches.end();
1829+
PI != PE; ++PI) {
1830+
auto &Patch = *PI;
1831+
if (PatchGroup.empty())
1832+
PatchGroupAddress = Patch.first;
1833+
PatchGroup.push_back(Patch.second);
1834+
if (std::next(PI) == PE || std::next(PI)->first != Patch.first + 4) {
1835+
BC.createInstructionPatch(PatchGroupAddress, PatchGroup);
1836+
PatchGroup.clear();
1837+
}
1838+
}
1839+
}
1840+
17231841
// Inform BinaryContext that this function symbols will not be defined and
17241842
// relocations should not be created against them.
17251843
if (BC.HasRelocations) {

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,8 +1269,10 @@ Error SimplifyRODataLoads::runOnFunctions(BinaryContext &BC) {
12691269

12701270
Error AssignSections::runOnFunctions(BinaryContext &BC) {
12711271
for (BinaryFunction *Function : BC.getInjectedBinaryFunctions()) {
1272-
Function->setCodeSectionName(BC.getInjectedCodeSectionName());
1273-
Function->setColdCodeSectionName(BC.getInjectedColdCodeSectionName());
1272+
if (!Function->isPatch()) {
1273+
Function->setCodeSectionName(BC.getInjectedCodeSectionName());
1274+
Function->setColdCodeSectionName(BC.getInjectedColdCodeSectionName());
1275+
}
12741276
}
12751277

12761278
// In non-relocation mode functions have pre-assigned section names.

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5078,6 +5078,8 @@ void RewriteInstance::updateELFSymbolTable(
50785078

50795079
// Add symbols of injected functions
50805080
for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
5081+
if (Function->isAnonymous())
5082+
continue;
50815083
ELFSymTy NewSymbol;
50825084
BinarySection *OriginSection = Function->getOriginSection();
50835085
NewSymbol.st_shndx =

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,12 +1803,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
18031803
return &SymExpr->getSymbol();
18041804
}
18051805

1806-
// This is the same as the base class, but since we are overriding one of
1807-
// getTargetSymbol's signatures above, we need to override all of them.
1808-
const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
1809-
return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();
1810-
}
1811-
18121806
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
18131807
const MCSymbol *&TBB, const MCSymbol *&FBB,
18141808
MCInst *&CondBranch,

0 commit comments

Comments
 (0)