Skip to content

[BOLT] Add auto parsing for Linux kernel .altinstructions #95068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 67 additions & 5 deletions bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ class LinuxKernelRewriter final : public MetadataRewriter {

/// Handle alternative instruction info from .altinstructions.
Error readAltInstructions();
Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
bool AltInstHasPadLen, bool ParseOnly);
Error rewriteAltInstructions();

/// Read .pci_fixup
Expand Down Expand Up @@ -1319,12 +1321,69 @@ Error LinuxKernelRewriter::rewriteBugTable() {
/// u8 padlen; // present in older kernels
/// } __packed;
///
/// Note the structures is packed.
/// Note that the structure is packed.
///
/// Since the size of the "feature" field could be either u16 or u32, and
/// "padlen" presence is unknown, we attempt to parse .altinstructions section
/// using all possible combinations (four at this time). Since we validate the
/// contents of the section and its size, the detection works quite well.
/// Still, we leave the user the opportunity to specify these features on the
/// command line and skip the guesswork.
Error LinuxKernelRewriter::readAltInstructions() {
AltInstrSection = BC.getUniqueSectionByName(".altinstructions");
if (!AltInstrSection)
return Error::success();

// Presence of "padlen" field.
std::vector<bool> PadLenVariants;
if (opts::AltInstHasPadLen.getNumOccurrences())
PadLenVariants.push_back(opts::AltInstHasPadLen);
else
PadLenVariants = {false, true};

// Size (in bytes) variants of "feature" field.
std::vector<uint32_t> FeatureSizeVariants;
if (opts::AltInstFeatureSize.getNumOccurrences())
FeatureSizeVariants.push_back(opts::AltInstFeatureSize);
else
FeatureSizeVariants = {2, 4};

for (bool AltInstHasPadLen : PadLenVariants) {
for (uint32_t AltInstFeatureSize : FeatureSizeVariants) {
LLVM_DEBUG({
dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen
<< "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n";
});
if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
/*ParseOnly*/ true)) {
consumeError(std::move(E));
continue;
}

LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n");

if (!opts::AltInstHasPadLen.getNumOccurrences())
BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr
<< '=' << AltInstHasPadLen << '\n';

if (!opts::AltInstFeatureSize.getNumOccurrences())
BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr
<< '=' << AltInstFeatureSize << '\n';

return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
/*ParseOnly*/ false);
}
}

// We couldn't match the format. Read again to properly propagate the error
// to the user.
return tryReadAltInstructions(opts::AltInstFeatureSize,
opts::AltInstHasPadLen, /*ParseOnly*/ false);
}

Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
bool AltInstHasPadLen,
bool ParseOnly) {
const uint64_t Address = AltInstrSection->getAddress();
DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
BC.AsmInfo->isLittleEndian(),
Expand All @@ -1336,12 +1395,12 @@ Error LinuxKernelRewriter::readAltInstructions() {
Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
const uint64_t AltInstAddress =
Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize);
const uint64_t Feature = DE.getUnsigned(Cursor, AltInstFeatureSize);
const uint8_t OrgSize = DE.getU8(Cursor);
const uint8_t AltSize = DE.getU8(Cursor);

// Older kernels may have the padlen field.
const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0;
const uint8_t PadLen = AltInstHasPadLen ? DE.getU8(Cursor) : 0;

if (!Cursor)
return createStringError(
Expand All @@ -1358,7 +1417,7 @@ Error LinuxKernelRewriter::readAltInstructions() {
<< "\n\tFeature: 0x" << Twine::utohexstr(Feature)
<< "\n\tOrgSize: " << (int)OrgSize
<< "\n\tAltSize: " << (int)AltSize << '\n';
if (opts::AltInstHasPadLen)
if (AltInstHasPadLen)
BC.outs() << "\tPadLen: " << (int)PadLen << '\n';
}

Expand All @@ -1375,7 +1434,7 @@ Error LinuxKernelRewriter::readAltInstructions() {

BinaryFunction *AltBF =
BC.getBinaryFunctionContainingAddress(AltInstAddress);
if (AltBF && BC.shouldEmit(*AltBF)) {
if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) {
BC.errs()
<< "BOLT-WARNING: alternative instruction sequence found in function "
<< *AltBF << '\n';
Expand All @@ -1397,6 +1456,9 @@ Error LinuxKernelRewriter::readAltInstructions() {
" referenced by .altinstructions entry %d",
OrgInstAddress, EntryID);

if (ParseOnly)
continue;

// There could be more than one alternative instruction sequences for the
// same original instruction. Annotate each alternative separately.
std::string AnnotationName = "AltInst";
Expand Down
20 changes: 13 additions & 7 deletions bolt/test/X86/linux-alt-instruction.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,30 @@
## Older kernels used to have padlen field in alt_instr. Check compatibility.

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \
# RUN: %s -o %t.o
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
# RUN: %s -o %t.padlen.o
# RUN: %clang %cflags -nostdlib %t.padlen.o -o %t.padlen.exe \
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-has-padlen -o %t.out \
# RUN: llvm-bolt %t.padlen.exe --print-normalized --alt-inst-has-padlen -o %t.padlen.out \
# RUN: | FileCheck %s

## Check with a larger size of "feature" field in alt_instr.

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.fs4.o
# RUN: %clang %cflags -nostdlib %t.fs4.o -o %t.fs4.exe \
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=4 -o %t.out \
# RUN: llvm-bolt %t.fs4.exe --print-normalized --alt-inst-feature-size=4 -o %t.fs4.out \
# RUN: | FileCheck %s

## Check that out-of-bounds read is handled properly.

# RUN: not llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=2 -o %t.out
# RUN: not llvm-bolt %t.fs4.exe --alt-inst-feature-size=2 -o %t.fs4.out

## Check that BOLT automatically detects structure fields in .altinstructions.

# RUN: llvm-bolt %t.exe --print-normalized -o %t.out | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized -o %t.padlen.out | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized -o %t.fs4.out | FileCheck %s

# CHECK: BOLT-INFO: Linux kernel binary detected
# CHECK: BOLT-INFO: parsed 2 alternative instruction entries
Expand Down
Loading