Skip to content

[BOLT][AArch64] Include constant islands in disassembly #125961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -1435,6 +1435,17 @@ class BinaryContext {
bool PrintRelocations = false,
StringRef Endl = "\n") const;

/// Print data when embedded in the instruction stream keeping the format
/// similar to printInstruction().
void printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
uint64_t Offset) const;

/// Extract data from the binary corresponding to [Address, Address + Size)
/// range. Return an empty ArrayRef if the address range does not belong to
/// any section in the binary, crosses a section boundary, or falls into a
/// virtual section.
ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;

/// Print a range of instructions.
template <typename Itr>
uint64_t
Expand Down
5 changes: 5 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2060,6 +2060,11 @@ class BinaryFunction {
return Islands ? Islands->getAlignment() : 1;
}

/// If there is a constant island in the range [StartOffset, EndOffset),
/// return its address.
std::optional<uint64_t> getIslandInRange(uint64_t StartOffset,
uint64_t EndOffset) const;

uint64_t
estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const {
if (!Islands)
Expand Down
37 changes: 37 additions & 0 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1942,6 +1942,43 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
OS << " discriminator:" << Row.Discriminator;
}

ArrayRef<uint8_t> BinaryContext::extractData(uint64_t Address,
uint64_t Size) const {
ArrayRef<uint8_t> Res;

const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
if (!Section || Section->isVirtual())
return Res;

if (!Section->containsRange(Address, Size))
return Res;

auto *Bytes =
reinterpret_cast<const uint8_t *>(Section->getContents().data());
return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
}

void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
uint64_t Offset) const {
DataExtractor DE(Data, AsmInfo->isLittleEndian(),
AsmInfo->getCodePointerSize());
uint64_t DataOffset = 0;
while (DataOffset + 4 <= Data.size()) {
OS << format(" %08" PRIx64 ": \t.word\t0x", Offset + DataOffset);
const auto Word = DE.getUnsigned(&DataOffset, 4);
OS << Twine::utohexstr(Word) << '\n';
}
if (DataOffset + 2 <= Data.size()) {
OS << format(" %08" PRIx64 ": \t.short\t0x", Offset + DataOffset);
const auto Short = DE.getUnsigned(&DataOffset, 2);
OS << Twine::utohexstr(Short) << '\n';
}
if (DataOffset + 1 == Data.size()) {
OS << format(" %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset,
Data[DataOffset]);
}
}

void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
uint64_t Offset,
const BinaryFunction *Function,
Expand Down
34 changes: 34 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,11 +491,27 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
// Offset of the instruction in function.
uint64_t Offset = 0;

auto printConstantIslandInRange = [&](uint64_t Offset, uint64_t Size) {
std::optional<uint64_t> IslandOffset =
getIslandInRange(Offset, Offset + Size);

if (!IslandOffset)
return;

const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset);
BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize),
*IslandOffset);
};

if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
uint64_t PrevOffset = 0;
for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
Offset = II.first;

// Print any constant islands inbeetween the instructions.
printConstantIslandInRange(PrevOffset, Offset);

// Print label if exists at this offset.
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
Expand All @@ -506,7 +522,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
}

BC.printInstruction(OS, II.second, Offset, this);

PrevOffset = Offset;
}

// Print any data at the end of the function.
printConstantIslandInRange(PrevOffset, getMaxSize());
}

StringRef SplitPointMsg = "";
Expand Down Expand Up @@ -1048,6 +1069,19 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
return getSize() - Offset;
}

std::optional<uint64_t>
BinaryFunction::getIslandInRange(uint64_t StartOffset,
uint64_t EndOffset) const {
if (!Islands)
return std::nullopt;

auto Iter = llvm::lower_bound(Islands->DataOffsets, StartOffset);
if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset)
return *Iter;

return std::nullopt;
}

bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
ArrayRef<uint8_t> FunctionData = *getData();
uint64_t EndOfCode = getSize();
Expand Down
31 changes: 31 additions & 0 deletions bolt/test/AArch64/data-in-code.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
## Check that llvm-bolt prints data embedded in code.

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
# RUN: -fuse-ld=lld -Wl,-q

## Check disassembly of BOLT input.
# RUN: llvm-objdump %t.exe -d | FileCheck %s

# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s

.text
.balign 4

.global _start
.type _start, %function
_start:
mov x0, #0x0
.word 0x4f82e010
ret
.byte 0x0, 0xff, 0x42
# CHECK-LABEL: _start
# CHECK: mov x0, #0x0
# CHECK-NEXT: .word 0x4f82e010
# CHECK-NEXT: ret
# CHECK-NEXT: .short 0xff00
# CHECK-NEXT: .byte 0x42
.size _start, .-_start

## Force relocation mode.
.reloc 0, R_AARCH64_NONE