Skip to content

[BOLT][Linux] Support ORC for alternative instructions #96709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,10 @@ class BinaryFunction {
return const_cast<BinaryFunction *>(this)->getInstructionAtOffset(Offset);
}

/// When the function is in disassembled state, return an instruction that
/// contains the \p Offset.
MCInst *getInstructionContainingOffset(uint64_t Offset);

std::optional<MCInst> disassembleInstructionAtOffset(uint64_t Offset) const;

/// Return offset for the first instruction. If there is data at the
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4472,6 +4472,18 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
}
}

MCInst *BinaryFunction::getInstructionContainingOffset(uint64_t Offset) {
assert(CurrentState == State::Disassembled && "Wrong function state");

if (Offset > Size)
return nullptr;

auto II = Instructions.upper_bound(Offset);
assert(II != Instructions.begin() && "First instruction not at offset 0");
--II;
return &II->second;
}

void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
if (!opts::shouldPrint(*this))
return;
Expand Down
31 changes: 25 additions & 6 deletions bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = processSMPLocks())
return E;

if (Error E = readORCTables())
return E;

if (Error E = readStaticCalls())
return E;

Expand All @@ -313,6 +310,11 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = readAltInstructions())
return E;

// Some ORC entries could be linked to alternative instruction
// sequences. Hence, we read ORC after .altinstructions.
if (Error E = readORCTables())
return E;

if (Error E = readPCIFixupTable())
return E;

Expand Down Expand Up @@ -563,11 +565,28 @@ Error LinuxKernelRewriter::readORCTables() {
if (!BF->hasInstructions())
continue;

MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
if (!Inst)
const uint64_t Offset = IP - BF->getAddress();
MCInst *Inst = BF->getInstructionAtOffset(Offset);
if (!Inst) {
// Check if there is an alternative instruction(s) at this IP. Multiple
// alternative instructions can take a place of a single original
// instruction and each alternative can have a separate ORC entry.
// Since ORC table is shared between all alternative sequences, there's
// a requirement that only one (out of many) sequences can have an
// instruction from the ORC table to avoid ambiguities/conflicts.
//
// For now, we have limited support for alternatives. I.e. we still print
// functions with them, but will not change the code in the output binary.
// As such, we can ignore alternative ORC entries. They will be preserved
// in the binary, but will not get printed in the instruction stream.
Inst = BF->getInstructionContainingOffset(Offset);
if (Inst || BC.MIB->hasAnnotation(*Inst, "AltInst"))
continue;

return createStringError(
errc::executable_format_error,
"no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
}

// Some addresses will have two entries associated with them. The first
// one being a "weak" section terminator. Since we ignore the terminator,
Expand Down Expand Up @@ -1440,7 +1459,7 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
AltBF->setIgnored();
}

if (!BF || !BC.shouldEmit(*BF))
if (!BF || !BF->hasInstructions())
continue;

if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
Expand Down
49 changes: 47 additions & 2 deletions bolt/test/X86/linux-alt-instruction.s
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# RUN: llvm-bolt %t.exe --print-cfg -o %t.fs4.out | FileCheck %s

# CHECK: BOLT-INFO: Linux kernel binary detected
# CHECK: BOLT-INFO: parsed 2 alternative instruction entries
# CHECK: BOLT-INFO: parsed 3 alternative instruction entries

.text
.globl _start
Expand All @@ -50,10 +50,12 @@ _start:
# CHECK: rdtsc
# CHECK-SAME: AltInst: 1
# CHECK-SAME: AltInst2: 2
# CHECK-SAME: AltInst3: 3
nop
# CHECK-NEXT: nop
# CHECK-SAME: AltInst: 1
# CHECK-SAME: AltInst2: 2
# CHECK-SAME: AltInst3: 3
nop
nop
.L1:
Expand All @@ -66,6 +68,9 @@ _start:
rdtsc
.A1:
rdtscp
.A2:
pushf
pop %rax
.Ae:

## Alternative instruction info.
Expand All @@ -92,11 +97,51 @@ _start:
.word 0x3b # feature flags
.endif
.byte .L1 - .L0 # org size
.byte .Ae - .A1 # alt size
.byte .A2 - .A1 # alt size
.ifdef PADLEN
.byte 0
.endif

.long .L0 - . # org instruction
.long .A2 - . # alt instruction
.ifdef FEATURE_SIZE_4
.long 0x110 # feature flags
.else
.word 0x110 # feature flags
.endif
.byte .L1 - .L0 # org size
.byte .Ae - .A2 # alt size
.ifdef PADLEN
.byte 0
.endif

## ORC unwind for "pushf; pop %rax" alternative sequence.
.section .orc_unwind,"a",@progbits
.align 4
.section .orc_unwind_ip,"a",@progbits
.align 4

.section .orc_unwind
.2byte 8
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long _start - .

.section .orc_unwind
.2byte 16
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long .L0 + 1 - .

.section .orc_unwind
.2byte 8
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long .L0 + 2 - .

## Fake Linux Kernel sections.
.section __ksymtab,"a",@progbits
.section __ksymtab_gpl,"a",@progbits
Loading