[BOLT][AArch64] Add partial support for lite mode (#133014)

maksfb · web-flow · commit 96e5ee23a761 · 2025-03-27T21:33:25.000-07:00
In lite mode, we only emit code for a subset of functions while preserving the original code in .bolt.org.text. This requires updating code references in non-emitted functions to ensure that: * Non-optimized versions of the optimized code never execute. * Function pointer comparison semantics is preserved. On x86-64, we can update code references in-place using "pending relocations" added in scanExternalRefs(). However, on AArch64, this is not always possible due to address range limitations and linker address "relaxation". There are two types of code-to-code references: control transfer (e.g., calls and branches) and function pointer materialization. AArch64-specific control transfer instructions are covered by #116964. For function pointer materialization, simply changing the immediate field of an instruction is not always sufficient. In some cases, we need to modify a pair of instructions, such as undoing linker relaxation and converting NOP+ADR into ADRP+ADD sequence. To achieve this, we use the instruction patch mechanism instead of pending relocations. Instruction patches are emitted via the regular MC layer, just like regular functions. However, they have a fixed address and do not have an associated symbol table entry. This allows us to make more complex changes to the code, ensuring that function pointers are correctly updated. Such mechanism should also be portable to RISC-V and other architectures. To summarize, for AArch64, we extend the scanExternalRefs() process to undo linker relaxation and use instruction patches to partially overwrite unoptimized code.
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
@@ -544,9 +544,10 @@ class BinaryContext {
   ///
   /// Optional \p Name can be assigned to the patch. The name will be emitted to
   /// the symbol table at \p Address.
-  BinaryFunction *createInstructionPatch(uint64_t Address,
-                                         InstructionListType &Instructions,
-                                         const Twine &Name = "");
+  BinaryFunction *
+  createInstructionPatch(uint64_t Address,
+                         const InstructionListType &Instructions,
+                         const Twine &Name = "");
 
   std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
     return InjectedBinaryFunctions;
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
@@ -357,6 +357,12 @@ class BinaryFunction {
   /// True if another function body was merged into this one.
   bool HasFunctionsFoldedInto{false};
 
+  /// True if the function is used for patching code at a fixed address.
+  bool IsPatch{false};
+
+  /// True if the function should not have an associated symbol table entry.
+  bool IsAnonymous{false};
+
   /// Name for the section this function code should reside in.
   std::string CodeSectionName;
 
@@ -1358,6 +1364,12 @@ class BinaryFunction {
   /// Return true if other functions were folded into this one.
   bool hasFunctionsFoldedInto() const { return HasFunctionsFoldedInto; }
 
+  /// Return true if this function is used for patching existing code.
+  bool isPatch() const { return IsPatch; }
+
+  /// Return true if the function should not have associated symbol table entry.
+  bool isAnonymous() const { return IsAnonymous; }
+
   /// If this function was folded, return the function it was folded into.
   BinaryFunction *getFoldedIntoFunction() const { return FoldedIntoFunction; }
 
@@ -1734,6 +1746,18 @@ class BinaryFunction {
   /// Indicate that another function body was merged with this function.
   void setHasFunctionsFoldedInto() { HasFunctionsFoldedInto = true; }
 
+  /// Indicate that this function is a patch.
+  void setIsPatch(bool V) {
+    assert(isInjected() && "Only injected functions can be used as patches");
+    IsPatch = V;
+  }
+
+  /// Indicate if the function should have a name in the symbol table.
+  void setAnonymous(bool V) {
+    assert(isInjected() && "Only injected functions could be anonymous");
+    IsAnonymous = V;
+  }
+
   void setHasSDTMarker(bool V) { HasSDTMarker = V; }
 
   /// Mark the function as using ORC format for stack unwinding.
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1264,9 +1264,12 @@ class MCPlusBuilder {
     return nullptr;
   }
 
-  /// Return MCSymbol extracted from a target expression
+  /// Return MCSymbol extracted from the expression.
   virtual const MCSymbol *getTargetSymbol(const MCExpr *Expr) const {
-    return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();
+    if (auto *SymbolRefExpr = dyn_cast<const MCSymbolRefExpr>(Expr))
+      return &SymbolRefExpr->getSymbol();
+
+    return nullptr;
   }
 
   /// Return addend that represents an offset from MCSymbol target
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
@@ -2401,8 +2401,10 @@ BinaryContext::createInjectedBinaryFunction(const std::string &Name,
   return BF;
 }
 
-BinaryFunction *BinaryContext::createInstructionPatch(
-    uint64_t Address, InstructionListType &Instructions, const Twine &Name) {
+BinaryFunction *
+BinaryContext::createInstructionPatch(uint64_t Address,
+                                      const InstructionListType &Instructions,
+                                      const Twine &Name) {
   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
   assert(Section && "cannot get section for patching");
   assert(Section->hasSectionRef() && Section->isText() &&
@@ -2423,6 +2425,11 @@ BinaryFunction *BinaryContext::createInstructionPatch(
   PBF->setFileOffset(FileOffset);
   PBF->setOriginSection(&Section.get());
   PBF->addBasicBlock()->addInstructions(Instructions);
+  PBF->setIsPatch(true);
+
+  // Don't create symbol table entry if the name wasn't specified.
+  if (Name.str().empty())
+    PBF->setAnonymous(true);
 
   return PBF;
 }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
@@ -1583,13 +1583,18 @@ bool BinaryFunction::scanExternalRefs() {
   assert(FunctionData.size() == getMaxSize() &&
          "function size does not match raw data size");
 
-  if (BC.isX86())
-    BC.SymbolicDisAsm->setSymbolizer(
-        BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
+  BC.SymbolicDisAsm->setSymbolizer(
+      BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
+
+  // A list of patches for this function.
+  using PatchTy = std::pair<uint64_t, MCInst>;
+  std::vector<PatchTy> InstructionPatches;
 
   // Disassemble contents of the function. Detect code entry points and create
   // relocations for references to code that will be moved.
   uint64_t Size = 0; // instruction size
+  MCInst Instruction;
+  MCInst PrevInstruction;
   for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
     // Check for data inside code and ignore it
     if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
@@ -1598,7 +1603,7 @@ bool BinaryFunction::scanExternalRefs() {
     }
 
     const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
-    MCInst Instruction;
+    PrevInstruction = Instruction;
     if (!BC.SymbolicDisAsm->getInstruction(Instruction, Size,
                                            FunctionData.slice(Offset),
                                            AbsoluteInstrAddr, nulls())) {
@@ -1673,12 +1678,108 @@ bool BinaryFunction::scanExternalRefs() {
     if (BranchTargetSymbol) {
       BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
                                   Emitter.LocalCtx.get());
-    } else if (!llvm::any_of(Instruction,
-                             [](const MCOperand &Op) { return Op.isExpr(); })) {
-      // Skip assembly if the instruction may not have any symbolic operands.
-      continue;
     } else {
       analyzeInstructionForFuncReference(Instruction);
+      const bool NeedsPatch = llvm::any_of(
+          MCPlus::primeOperands(Instruction), [&](const MCOperand &Op) {
+            return Op.isExpr() &&
+                   !ignoreReference(BC.MIB->getTargetSymbol(Op.getExpr()));
+          });
+      if (!NeedsPatch)
+        continue;
+    }
+
+    // For AArch64, we need to undo relaxation done by the linker if the target
+    // of the instruction is a function that we plan to move.
+    //
+    // Linker relaxation is documented at:
+    // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
+    // under #relocation-optimization.
+    if (const Relocation *Rel;
+        BC.isAArch64() && (Rel = getRelocationAt(Offset))) {
+      // NOP+ADR sequence can originate from either ADRP+ADD or ADRP+LDR.
+      // In either case, we convert it into ADRP+ADD.
+      if (BC.MIB->isADR(Instruction) &&
+          (Rel->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
+           Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC)) {
+        if (!BC.MIB->isNoop(PrevInstruction)) {
+          // In case of unexpected conversion from the linker, skip target
+          // optimization.
+          const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Instruction);
+          BC.errs() << "BOLT-WARNING: cannot undo linker relaxation for "
+                       "instruction at 0x"
+                    << Twine::utohexstr(AbsoluteInstrAddr) << " referencing "
+                    << Symbol->getName() << '\n';
+          if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol))
+            TargetBF->setIgnored();
+          continue;
+        }
+
+        InstructionListType AdrpAdd =
+            BC.MIB->undoAdrpAddRelaxation(Instruction, BC.Ctx.get());
+        assert(AdrpAdd.size() == 2 && "Two instructions expected");
+        LLVM_DEBUG({
+          dbgs() << "BOLT-DEBUG: linker relaxation undone for instruction "
+                    "at 0x"
+                 << Twine::utohexstr(AbsoluteInstrAddr) << '\n';
+        });
+        InstructionPatches.push_back({AbsoluteInstrAddr - 4, AdrpAdd[0]});
+        InstructionPatches.push_back({AbsoluteInstrAddr, AdrpAdd[1]});
+        continue;
+      }
+
+      // If ADR was emitted by the compiler/assembler to reference a nearby
+      // local function, we cannot move away that function due to ADR address
+      // span limitation. Hence, we skip the optimization.
+      if (BC.MIB->isADR(Instruction) &&
+          Rel->Type == ELF::R_AARCH64_ADR_PREL_LO21) {
+        BC.errs() << "BOLT-WARNING: unable to convert ADR that references "
+                  << Rel->Symbol->getName()
+                  << ". Will not optimize the target\n";
+        if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
+          TargetBF->setIgnored();
+        continue;
+      }
+
+      // In the case of GOT load, ADRP+LDR can also be converted into ADRP+ADD.
+      // When this happens, it's not always possible to properly symbolize ADRP
+      // operand and we might have to adjust the operand based on the next
+      // instruction.
+      if (BC.MIB->isAddXri(Instruction) &&
+          Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
+        if (!BC.MIB->matchAdrpAddPair(PrevInstruction, Instruction)) {
+          BC.errs() << "BOLT-ERROR: cannot find matching ADRP for relaxed LDR "
+                       "instruction at 0x"
+                    << Twine::utohexstr(AbsoluteInstrAddr) << '\n';
+          exit(1);
+        }
+
+        // Check if ADRP was already patched. If not, add a new patch for it.
+        if (InstructionPatches.empty() ||
+            InstructionPatches.back().first != AbsoluteInstrAddr - 4)
+          InstructionPatches.push_back(
+              {AbsoluteInstrAddr - 4, PrevInstruction});
+
+        // Adjust the operand for ADRP from the patch.
+        MCInst &ADRPInst = InstructionPatches.back().second;
+        const MCSymbol *ADRPSymbol = BC.MIB->getTargetSymbol(ADRPInst);
+        const MCSymbol *ADDSymbol = BC.MIB->getTargetSymbol(Instruction);
+        if (ADRPSymbol != ADDSymbol) {
+          const int64_t Addend = BC.MIB->getTargetAddend(Instruction);
+          BC.MIB->setOperandToSymbolRef(ADRPInst, /*OpNum*/ 1, ADDSymbol,
+                                        Addend, BC.Ctx.get(),
+                                        ELF::R_AARCH64_NONE);
+        }
+      }
+    }
+
+    // On AArch64, we use instruction patches for fixing references. We make an
+    // exception for branch instructions since they require optional
+    // relocations.
+    if (BC.isAArch64() && !BranchTargetSymbol) {
+      LLVM_DEBUG(BC.printInstruction(dbgs(), Instruction, AbsoluteInstrAddr));
+      InstructionPatches.push_back({AbsoluteInstrAddr, Instruction});
+      continue;
     }
 
     // Emit the instruction using temp emitter and generate relocations.
@@ -1720,6 +1821,23 @@ bool BinaryFunction::scanExternalRefs() {
     for (Relocation &Rel : FunctionRelocations)
       getOriginSection()->addPendingRelocation(Rel);
 
+  // Add patches grouping them together.
+  if (!InstructionPatches.empty()) {
+    uint64_t PatchGroupAddress;
+    InstructionListType PatchGroup;
+    for (auto PI = InstructionPatches.begin(), PE = InstructionPatches.end();
+         PI != PE; ++PI) {
+      auto &Patch = *PI;
+      if (PatchGroup.empty())
+        PatchGroupAddress = Patch.first;
+      PatchGroup.push_back(Patch.second);
+      if (std::next(PI) == PE || std::next(PI)->first != Patch.first + 4) {
+        BC.createInstructionPatch(PatchGroupAddress, PatchGroup);
+        PatchGroup.clear();
+      }
+    }
+  }
+
   // Inform BinaryContext that this function symbols will not be defined and
   // relocations should not be created against them.
   if (BC.HasRelocations) {
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1269,8 +1269,10 @@ Error SimplifyRODataLoads::runOnFunctions(BinaryContext &BC) {
 
 Error AssignSections::runOnFunctions(BinaryContext &BC) {
   for (BinaryFunction *Function : BC.getInjectedBinaryFunctions()) {
-    Function->setCodeSectionName(BC.getInjectedCodeSectionName());
-    Function->setColdCodeSectionName(BC.getInjectedColdCodeSectionName());
+    if (!Function->isPatch()) {
+      Function->setCodeSectionName(BC.getInjectedCodeSectionName());
+      Function->setColdCodeSectionName(BC.getInjectedColdCodeSectionName());
+    }
   }
 
   // In non-relocation mode functions have pre-assigned section names.
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5078,6 +5078,8 @@ void RewriteInstance::updateELFSymbolTable(
 
   // Add symbols of injected functions
   for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
+    if (Function->isAnonymous())
+      continue;
     ELFSymTy NewSymbol;
     BinarySection *OriginSection = Function->getOriginSection();
     NewSymbol.st_shndx =
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -1803,12 +1803,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return &SymExpr->getSymbol();
   }
 
-  // This is the same as the base class, but since we are overriding one of
-  // getTargetSymbol's signatures above, we need to override all of them.
-  const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
-    return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();
-  }
-
   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
                      const MCSymbol *&TBB, const MCSymbol *&FBB,
                      MCInst *&CondBranch,
diff --git a/bolt/test/AArch64/lite-mode.s b/bolt/test/AArch64/lite-mode.s

Original file line number	Diff line number	Diff line change
`@@ -1264,9 +1264,12 @@ class MCPlusBuilder {`
`1264`	`1264`	`return nullptr;`
`1265`	`1265`	`}`
`1266`	`1266`
`1267`		`- /// Return MCSymbol extracted from a target expression`
	`1267`	`+ /// Return MCSymbol extracted from the expression.`
`1268`	`1268`	`virtual const MCSymbol getTargetSymbol(const MCExpr Expr) const {`
`1269`		`- return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();`
	`1269`	`+ if (auto *SymbolRefExpr = dyn_cast<const MCSymbolRefExpr>(Expr))`
	`1270`	`+ return &SymbolRefExpr->getSymbol();`
	`1271`	`+`
	`1272`	`+ return nullptr;`
`1270`	`1273`	`}`
`1271`	`1274`
`1272`	`1275`	`/// Return addend that represents an offset from MCSymbol target`