Skip to content

Commit ad99067

Browse files
committed
[BOLT] Include constant islands in disassembly
When printing disassembly of a function with constant islands, include the island info in the dump. At the moment, only print islands in pre-CFG state. Include islands that are interleaved with instructions.
1 parent 1c4ee06 commit ad99067

File tree

5 files changed

+115
-0
lines changed

5 files changed

+115
-0
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,6 +1435,16 @@ class BinaryContext {
14351435
bool PrintRelocations = false,
14361436
StringRef Endl = "\n") const;
14371437

1438+
/// Print data when embedded in the instruction stream keeping the format
1439+
/// similar to printInstruction().
1440+
void printData(raw_ostream &OS, ArrayRef<uint8_t> Data, uint64_t Offset) const;
1441+
1442+
/// Extract data from the binary corresponding to [Address, Address + Size)
1443+
/// range. Return an empty ArrayRef if the address range does not belong to
1444+
/// any section in the binary, crosses a section boundary, or falls into a
1445+
/// virtual section.
1446+
ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;
1447+
14381448
/// Print a range of instructions.
14391449
template <typename Itr>
14401450
uint64_t

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,11 @@ class BinaryFunction {
20602060
return Islands ? Islands->getAlignment() : 1;
20612061
}
20622062

2063+
/// If there is a constant island in the range [StartOffset, EndOffset),
2064+
/// return its address.
2065+
std::optional<uint64_t>
2066+
getIslandInRange(uint64_t StartOffset, uint64_t EndOffset) const;
2067+
20632068
uint64_t
20642069
estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const {
20652070
if (!Islands)

bolt/lib/Core/BinaryContext.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,6 +1942,42 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
19421942
OS << " discriminator:" << Row.Discriminator;
19431943
}
19441944

1945+
ArrayRef<uint8_t>
1946+
BinaryContext::extractData(uint64_t Address, uint64_t Size) const {
1947+
ArrayRef<uint8_t> Res;
1948+
1949+
const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1950+
if (!Section || Section->isVirtual())
1951+
return Res;
1952+
1953+
if (!Section->containsRange(Address, Size))
1954+
return Res;
1955+
1956+
auto *Bytes =
1957+
reinterpret_cast<const uint8_t *>(Section->getContents().data());
1958+
return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
1959+
}
1960+
1961+
void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
1962+
uint64_t Offset) const {
1963+
DataExtractor DE(Data, AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize());
1964+
uint64_t DataOffset = 0;
1965+
while (DataOffset + 4 <= Data.size()) {
1966+
OS << format(" %08" PRIx64 ": \t.word\t0x", Offset + DataOffset);
1967+
const auto Word = DE.getUnsigned(&DataOffset, 4);
1968+
OS << Twine::utohexstr(Word) << '\n';
1969+
}
1970+
if (DataOffset + 2 <= Data.size()) {
1971+
OS << format(" %08" PRIx64 ": \t.short\t0x", Offset + DataOffset);
1972+
const auto Short = DE.getUnsigned(&DataOffset, 2);
1973+
OS << Twine::utohexstr(Short) << '\n';
1974+
}
1975+
if (DataOffset + 1 == Data.size()) {
1976+
OS << format(" %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset,
1977+
Data[DataOffset]);
1978+
}
1979+
}
1980+
19451981
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
19461982
uint64_t Offset,
19471983
const BinaryFunction *Function,

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,11 +491,27 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
491491
// Offset of the instruction in function.
492492
uint64_t Offset = 0;
493493

494+
auto printConstantIslandInRange = [&](uint64_t Offset, uint64_t Size) {
495+
std::optional<uint64_t> IslandOffset =
496+
getIslandInRange(Offset, Offset + Size);
497+
498+
if (!IslandOffset)
499+
return;
500+
501+
const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset);
502+
BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize),
503+
*IslandOffset);
504+
};
505+
494506
if (BasicBlocks.empty() && !Instructions.empty()) {
495507
// Print before CFG was built.
508+
uint64_t PrevOffset = 0;
496509
for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
497510
Offset = II.first;
498511

512+
// Print any constant islands inbeetween the instructions.
513+
printConstantIslandInRange(PrevOffset, Offset);
514+
499515
// Print label if exists at this offset.
500516
auto LI = Labels.find(Offset);
501517
if (LI != Labels.end()) {
@@ -506,7 +522,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
506522
}
507523

508524
BC.printInstruction(OS, II.second, Offset, this);
525+
526+
PrevOffset = Offset;
509527
}
528+
529+
// Print any data at the end of the function.
530+
printConstantIslandInRange(PrevOffset, getMaxSize());
510531
}
511532

512533
StringRef SplitPointMsg = "";
@@ -1048,6 +1069,18 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
10481069
return getSize() - Offset;
10491070
}
10501071

1072+
std::optional<uint64_t>
1073+
BinaryFunction::getIslandInRange(uint64_t StartOffset, uint64_t EndOffset) const {
1074+
if (!Islands)
1075+
return std::nullopt;
1076+
1077+
auto Iter = llvm::lower_bound(Islands->DataOffsets, StartOffset);
1078+
if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset)
1079+
return *Iter;
1080+
1081+
return std::nullopt;
1082+
}
1083+
10511084
bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
10521085
ArrayRef<uint8_t> FunctionData = *getData();
10531086
uint64_t EndOfCode = getSize();

bolt/test/AArch64/data-in-code.s

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
## Check that llvm-bolt prints data embedded in code.
2+
3+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
4+
# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
5+
# RUN: -fuse-ld=lld -Wl,-q
6+
7+
## Check disassembly of BOLT input.
8+
# RUN: llvm-objdump %t.exe -d | FileCheck %s
9+
10+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s
11+
12+
.text
13+
.balign 4
14+
15+
.global _start
16+
.type _start, %function
17+
_start:
18+
mov x0, #0x0
19+
.word 0x4f82e010
20+
ret
21+
.byte 0x0, 0xff, 0x42
22+
# CHECK-LABEL: _start
23+
# CHECK: mov x0, #0x0
24+
# CHECK-NEXT: .word 0x4f82e010
25+
# CHECK-NEXT: ret
26+
# CHECK-NEXT: .short 0xff00
27+
# CHECK-NEXT: .byte 0x42
28+
.size _start, .-_start
29+
30+
## Force relocation mode.
31+
.reloc 0, R_AARCH64_NONE

0 commit comments

Comments
 (0)