-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[BOLT] Add reading support for Linux kernel .altinstructions section #84283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,21 @@ using namespace bolt; | |
|
||
namespace opts { | ||
|
||
static cl::opt<bool> | ||
AltInstHasPadLen("alt-inst-has-padlen", | ||
cl::desc("specify that .altinstructions has padlen field"), | ||
cl::init(false), cl::Hidden, cl::cat(BoltCategory)); | ||
|
||
static cl::opt<uint32_t> | ||
AltInstFeatureSize("alt-inst-feature-size", | ||
cl::desc("size of feature field in .altinstructions"), | ||
cl::init(2), cl::Hidden, cl::cat(BoltCategory)); | ||
|
||
static cl::opt<bool> | ||
DumpAltInstructions("dump-alt-instructions", | ||
cl::desc("dump Linux alternative instructions info"), | ||
cl::init(false), cl::Hidden, cl::cat(BoltCategory)); | ||
|
||
static cl::opt<bool> | ||
DumpExceptions("dump-linux-exceptions", | ||
cl::desc("dump Linux kernel exception table"), | ||
|
@@ -157,6 +172,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { | |
/// Alignment of paravirtual patch structures. | ||
static constexpr size_t PARA_PATCH_ALIGN = 8; | ||
|
||
/// .altinstructions section. | ||
ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address; | ||
|
||
/// Section containing Linux bug table. | ||
ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; | ||
|
||
|
@@ -205,6 +223,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { | |
|
||
Error readBugTable(); | ||
|
||
/// Read alternative instruction info from .altinstructions. | ||
Error readAltInstructions(); | ||
|
||
/// Mark instructions referenced by kernel metadata. | ||
Error markInstructions(); | ||
|
||
|
@@ -232,6 +253,9 @@ class LinuxKernelRewriter final : public MetadataRewriter { | |
if (Error E = readBugTable()) | ||
return E; | ||
|
||
if (Error E = readAltInstructions()) | ||
return E; | ||
|
||
return Error::success(); | ||
} | ||
|
||
|
@@ -1132,6 +1156,123 @@ Error LinuxKernelRewriter::readBugTable() { | |
return Error::success(); | ||
} | ||
|
||
/// The kernel can replace certain instruction sequences depending on hardware | ||
/// it is running on and features specified during boot time. The information | ||
/// about alternative instruction sequences is stored in .altinstructions | ||
/// section. The format of entries in this section is defined in | ||
/// arch/x86/include/asm/alternative.h: | ||
/// | ||
/// struct alt_instr { | ||
/// s32 instr_offset; | ||
/// s32 repl_offset; | ||
/// uXX feature; | ||
/// u8 instrlen; | ||
/// u8 replacementlen; | ||
/// u8 padlen; // present in older kernels | ||
/// } __packed; | ||
/// | ||
/// Note the structures is packed. | ||
Error LinuxKernelRewriter::readAltInstructions() { | ||
AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); | ||
if (!AltInstrSection) | ||
return Error::success(); | ||
|
||
const uint64_t Address = AltInstrSection->getAddress(); | ||
DataExtractor DE = DataExtractor(AltInstrSection->getContents(), | ||
BC.AsmInfo->isLittleEndian(), | ||
BC.AsmInfo->getCodePointerSize()); | ||
uint64_t EntryID = 0; | ||
DataExtractor::Cursor Cursor(0); | ||
while (Cursor && !DE.eof(Cursor)) { | ||
const uint64_t OrgInstAddress = | ||
Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here you're parsing a signed 32-bit value as unsigned then casting. Is it always correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. That's what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't coding guide prefers using c++ style casting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The rule lists an exception for integral type casts. |
||
const uint64_t AltInstAddress = | ||
Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); | ||
const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); | ||
const uint8_t OrgSize = DE.getU8(Cursor); | ||
const uint8_t AltSize = DE.getU8(Cursor); | ||
|
||
// Older kernels may have the padlen field. | ||
const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; | ||
|
||
if (!Cursor) | ||
return createStringError(errc::executable_format_error, | ||
"out of bounds while reading .altinstructions"); | ||
|
||
++EntryID; | ||
|
||
if (opts::DumpAltInstructions) { | ||
BC.outs() << "Alternative instruction entry: " << EntryID | ||
<< "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) | ||
<< "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) | ||
<< "\n\tFeature: 0x" << Twine::utohexstr(Feature) | ||
<< "\n\tOrgSize: " << (int)OrgSize | ||
<< "\n\tAltSize: " << (int)AltSize << '\n'; | ||
if (opts::AltInstHasPadLen) | ||
BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; | ||
} | ||
|
||
if (AltSize > OrgSize) | ||
return createStringError(errc::executable_format_error, | ||
"error reading .altinstructions"); | ||
|
||
BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); | ||
if (!BF && opts::Verbosity) { | ||
BC.outs() << "BOLT-INFO: no function matches address 0x" | ||
<< Twine::utohexstr(OrgInstAddress) | ||
<< " of instruction from .altinstructions\n"; | ||
} | ||
|
||
BinaryFunction *AltBF = | ||
BC.getBinaryFunctionContainingAddress(AltInstAddress); | ||
if (AltBF && BC.shouldEmit(*AltBF)) { | ||
BC.errs() | ||
<< "BOLT-WARNING: alternative instruction sequence found in function " | ||
<< *AltBF << '\n'; | ||
AltBF->setIgnored(); | ||
} | ||
|
||
if (!BF || !BC.shouldEmit(*BF)) | ||
continue; | ||
|
||
if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) | ||
return createStringError(errc::executable_format_error, | ||
"error reading .altinstructions"); | ||
|
||
MCInst *Inst = | ||
BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); | ||
if (!Inst) | ||
return createStringError(errc::executable_format_error, | ||
"no instruction at address 0x%" PRIx64 | ||
" referenced by .altinstructions entry %d", | ||
OrgInstAddress, EntryID); | ||
|
||
// There could be more than one alternative instruction sequences for the | ||
// same original instruction. Annotate each alternative separately. | ||
std::string AnnotationName = "AltInst"; | ||
unsigned N = 2; | ||
while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) | ||
AnnotationName = "AltInst" + std::to_string(N++); | ||
|
||
BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); | ||
|
||
// Annotate all instructions from the original sequence. Note that it's not | ||
// the most efficient way to look for instructions in the address range, | ||
// but since alternative instructions are uncommon, it will do for now. | ||
for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { | ||
Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - | ||
BF->getAddress()); | ||
if (Inst) | ||
BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); | ||
} | ||
} | ||
|
||
BC.outs() << "BOLT-INFO: parsed " << EntryID | ||
<< " alternative instruction entries\n"; | ||
|
||
return Error::success(); | ||
} | ||
|
||
} // namespace | ||
|
||
std::unique_ptr<MetadataRewriter> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# REQUIRES: system-linux | ||
|
||
## Check that BOLT correctly parses the Linux kernel .altinstructions section | ||
## and annotates alternative instructions. | ||
|
||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o | ||
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ | ||
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie | ||
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops -o %t.out \ | ||
# RUN: --alt-inst-feature-size=2 | FileCheck %s | ||
|
||
## Older kernels used to have padlen field in alt_instr. Check compatibility. | ||
|
||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ | ||
# RUN: %s -o %t.o | ||
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ | ||
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie | ||
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops --alt-inst-has-padlen \ | ||
# RUN: -o %t.out | FileCheck %s | ||
|
||
## Check with a larger size of "feature" field in alt_instr. | ||
|
||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ | ||
# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o | ||
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ | ||
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie | ||
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops \ | ||
# RUN: --alt-inst-feature-size=4 -o %t.out | FileCheck %s | ||
|
||
# CHECK: BOLT-INFO: Linux kernel binary detected | ||
# CHECK: BOLT-INFO: parsed 2 alternative instruction entries | ||
|
||
.text | ||
.globl _start | ||
.type _start, %function | ||
_start: | ||
# CHECK: Binary Function "_start" | ||
.L0: | ||
rdtsc | ||
# CHECK: rdtsc | ||
# CHECK-SAME: AltInst: 1 | ||
# CHECK-SAME: AltInst2: 2 | ||
nop | ||
# CHECK-NEXT: nop | ||
# CHECK-SAME: AltInst: 1 | ||
# CHECK-SAME: AltInst2: 2 | ||
nop | ||
nop | ||
.L1: | ||
ret | ||
.size _start, .-_start | ||
|
||
.section .altinstr_replacement,"ax",@progbits | ||
.A0: | ||
lfence | ||
rdtsc | ||
.A1: | ||
rdtscp | ||
.Ae: | ||
|
||
## Alternative instruction info. | ||
.section .altinstructions,"a",@progbits | ||
|
||
.long .L0 - . # org instruction | ||
.long .A0 - . # alt instruction | ||
.ifdef FEATURE_SIZE_4 | ||
.long 0x72 # feature flags | ||
.else | ||
.word 0x72 # feature flags | ||
.endif | ||
.byte .L1 - .L0 # org size | ||
.byte .A1 - .A0 # alt size | ||
.ifdef PADLEN | ||
.byte 0 | ||
.endif | ||
|
||
.long .L0 - . # org instruction | ||
.long .A1 - . # alt instruction | ||
.ifdef FEATURE_SIZE_4 | ||
.long 0x3b # feature flags | ||
.else | ||
.word 0x3b # feature flags | ||
.endif | ||
.byte .L1 - .L0 # org size | ||
.byte .Ae - .A1 # alt size | ||
.ifdef PADLEN | ||
.byte 0 | ||
.endif | ||
|
||
## Fake Linux Kernel sections. | ||
.section __ksymtab,"a",@progbits | ||
.section __ksymtab_gpl,"a",@progbits |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what does uXX stand for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
u16 or u32. Perhaps u64 in the future.