Skip to content

Commit 6e26246

Browse files
authored
[BOLT][DWARF] Refactor address ranges processing (#71225)
Create BinaryFunction::translateInputToOutputRange() and use it for updating DWARF debug ranges and location lists while de-duplicating the existing code. Additionally, move DWARF-specific code out of BinaryFunction and add print functions to facilitate debugging. Note that this change is deliberately kept "bug-level" compatible with the existing solution to keep it NFCI and make it easier to track any possible regressions in the future updates to the ranges-handling code.
1 parent 5aa2c65 commit 6e26246

File tree

4 files changed

+182
-168
lines changed

4 files changed

+182
-168
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,15 +2309,10 @@ class BinaryFunction {
23092309
/// removed.
23102310
uint64_t translateInputToOutputAddress(uint64_t Address) const;
23112311

2312-
/// Take address ranges corresponding to the input binary and translate
2313-
/// them to address ranges in the output binary.
2314-
DebugAddressRangesVector translateInputToOutputRanges(
2315-
const DWARFAddressRangesVector &InputRanges) const;
2316-
2317-
/// Similar to translateInputToOutputRanges() but operates on location lists
2318-
/// and moves associated data to output location lists.
2319-
DebugLocationsVector
2320-
translateInputToOutputLocationList(const DebugLocationsVector &InputLL) const;
2312+
/// Translate a contiguous range of addresses in the input binary into a set
2313+
/// of ranges in the output binary.
2314+
DebugAddressRangesVector
2315+
translateInputToOutputRange(DebugAddressRange InRange) const;
23212316

23222317
/// Return true if the function is an AArch64 linker inserted veneer
23232318
bool isAArch64Veneer() const;

bolt/include/bolt/Core/DebugData.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/CodeGen/DIE.h"
1919
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
2020
#include "llvm/MC/MCDwarf.h"
21+
#include "llvm/Support/FormatVariadic.h"
2122
#include "llvm/Support/SMLoc.h"
2223
#include "llvm/Support/raw_ostream.h"
2324
#include <cstdint>
@@ -95,6 +96,12 @@ static inline bool operator<(const DebugAddressRange &LHS,
9596
return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
9697
}
9798

99+
inline raw_ostream &operator<<(raw_ostream &OS,
100+
const DebugAddressRange &Range) {
101+
OS << formatv("[{0:x}, {1:x})", Range.LowPC, Range.HighPC);
102+
return OS;
103+
}
104+
98105
/// DebugAddressRangesVector - represents a set of absolute address ranges.
99106
using DebugAddressRangesVector = SmallVector<DebugAddressRange, 2>;
100107

@@ -106,6 +113,18 @@ struct DebugLocationEntry {
106113
SmallVector<uint8_t, 4> Expr;
107114
};
108115

116+
inline raw_ostream &operator<<(raw_ostream &OS,
117+
const DebugLocationEntry &Entry) {
118+
OS << formatv("[{0:x}, {1:x}) : [", Entry.LowPC, Entry.HighPC);
119+
const char *Sep = "";
120+
for (unsigned Byte : Entry.Expr) {
121+
OS << Sep << Byte;
122+
Sep = ", ";
123+
}
124+
OS << "]";
125+
return OS;
126+
}
127+
109128
using DebugLocationsVector = SmallVector<DebugLocationEntry, 4>;
110129

111130
/// References a row in a DWARFDebugLine::LineTable by the DWARF

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 67 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -4251,92 +4251,88 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
42514251
BB->getOutputAddressRange().second);
42524252
}
42534253

4254-
DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
4255-
const DWARFAddressRangesVector &InputRanges) const {
4256-
DebugAddressRangesVector OutputRanges;
4254+
DebugAddressRangesVector
4255+
BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
4256+
DebugAddressRangesVector OutRanges;
42574257

4258+
// The function was removed from the output. Return an empty range.
42584259
if (isFolded())
4259-
return OutputRanges;
4260+
return OutRanges;
42604261

4261-
// If the function hasn't changed return the same ranges.
4262+
// If the function hasn't changed return the same range.
42624263
if (!isEmitted()) {
4263-
OutputRanges.resize(InputRanges.size());
4264-
llvm::transform(InputRanges, OutputRanges.begin(),
4265-
[](const DWARFAddressRange &Range) {
4266-
return DebugAddressRange(Range.LowPC, Range.HighPC);
4267-
});
4268-
return OutputRanges;
4264+
OutRanges.emplace_back(InRange);
4265+
return OutRanges;
4266+
}
4267+
4268+
if (!containsAddress(InRange.LowPC))
4269+
return OutRanges;
4270+
4271+
// Special case of an empty range [X, X). Some tools expect X to be updated.
4272+
if (InRange.LowPC == InRange.HighPC) {
4273+
if (uint64_t NewPC = translateInputToOutputAddress(InRange.LowPC))
4274+
OutRanges.push_back(DebugAddressRange{NewPC, NewPC});
4275+
return OutRanges;
42694276
}
42704277

4271-
// Even though we will merge ranges in a post-processing pass, we attempt to
4272-
// merge them in a main processing loop as it improves the processing time.
4273-
uint64_t PrevEndAddress = 0;
4274-
for (const DWARFAddressRange &Range : InputRanges) {
4275-
if (!containsAddress(Range.LowPC)) {
4278+
uint64_t InputOffset = InRange.LowPC - getAddress();
4279+
const uint64_t InputEndOffset =
4280+
std::min(InRange.HighPC - getAddress(), getSize());
4281+
4282+
auto BBI = llvm::upper_bound(BasicBlockOffsets,
4283+
BasicBlockOffset(InputOffset, nullptr),
4284+
CompareBasicBlockOffsets());
4285+
assert(BBI != BasicBlockOffsets.begin());
4286+
4287+
// Iterate over blocks in the input order using BasicBlockOffsets.
4288+
for (--BBI; InputOffset < InputEndOffset && BBI != BasicBlockOffsets.end();
4289+
InputOffset = BBI->second->getEndOffset(), ++BBI) {
4290+
const BinaryBasicBlock &BB = *BBI->second;
4291+
if (InputOffset < BB.getOffset() || InputOffset >= BB.getEndOffset()) {
42764292
LLVM_DEBUG(
42774293
dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4278-
<< *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
4279-
<< Twine::utohexstr(Range.HighPC) << "]\n");
4280-
PrevEndAddress = 0;
4281-
continue;
4294+
<< *this << " : [0x" << Twine::utohexstr(InRange.LowPC)
4295+
<< ", 0x" << Twine::utohexstr(InRange.HighPC) << "]\n");
4296+
break;
42824297
}
4283-
uint64_t InputOffset = Range.LowPC - getAddress();
4284-
const uint64_t InputEndOffset =
4285-
std::min(Range.HighPC - getAddress(), getSize());
4286-
4287-
auto BBI = llvm::upper_bound(BasicBlockOffsets,
4288-
BasicBlockOffset(InputOffset, nullptr),
4289-
CompareBasicBlockOffsets());
4290-
--BBI;
4291-
do {
4292-
const BinaryBasicBlock *BB = BBI->second;
4293-
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4294-
LLVM_DEBUG(
4295-
dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
4296-
<< *this << " : [0x" << Twine::utohexstr(Range.LowPC)
4297-
<< ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
4298-
PrevEndAddress = 0;
4299-
break;
4300-
}
43014298

4302-
// Skip the range if the block was deleted.
4303-
if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4304-
const uint64_t StartAddress =
4305-
OutputStart + InputOffset - BB->getOffset();
4306-
uint64_t EndAddress = BB->getOutputAddressRange().second;
4307-
if (InputEndOffset < BB->getEndOffset())
4308-
EndAddress = StartAddress + InputEndOffset - InputOffset;
4309-
4310-
if (StartAddress == PrevEndAddress) {
4311-
OutputRanges.back().HighPC =
4312-
std::max(OutputRanges.back().HighPC, EndAddress);
4313-
} else {
4314-
OutputRanges.emplace_back(StartAddress,
4315-
std::max(StartAddress, EndAddress));
4316-
}
4317-
PrevEndAddress = OutputRanges.back().HighPC;
4318-
}
4299+
// Skip the block if it wasn't emitted.
4300+
if (!BB.getOutputAddressRange().first)
4301+
continue;
43194302

4320-
InputOffset = BB->getEndOffset();
4321-
++BBI;
4322-
} while (InputOffset < InputEndOffset);
4323-
}
4303+
// Find output address for an instruction with an offset greater or equal
4304+
// to /p Offset. The output address should fall within the same basic
4305+
// block boundaries.
4306+
auto translateBlockOffset = [&](const uint64_t Offset) {
4307+
const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
4308+
return OutAddress;
4309+
};
43244310

4325-
// Post-processing pass to sort and merge ranges.
4326-
llvm::sort(OutputRanges);
4327-
DebugAddressRangesVector MergedRanges;
4328-
PrevEndAddress = 0;
4329-
for (const DebugAddressRange &Range : OutputRanges) {
4330-
if (Range.LowPC <= PrevEndAddress) {
4331-
MergedRanges.back().HighPC =
4332-
std::max(MergedRanges.back().HighPC, Range.HighPC);
4333-
} else {
4334-
MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
4311+
uint64_t OutLowPC = BB.getOutputAddressRange().first;
4312+
if (InputOffset > BB.getOffset())
4313+
OutLowPC = translateBlockOffset(InputOffset - BB.getOffset());
4314+
4315+
uint64_t OutHighPC = BB.getOutputAddressRange().second;
4316+
if (InputEndOffset < BB.getEndOffset()) {
4317+
assert(InputEndOffset >= BB.getOffset());
4318+
OutHighPC = translateBlockOffset(InputEndOffset - BB.getOffset());
43354319
}
4336-
PrevEndAddress = MergedRanges.back().HighPC;
4320+
4321+
// Check if we can expand the last translated range.
4322+
if (!OutRanges.empty() && OutRanges.back().HighPC == OutLowPC)
4323+
OutRanges.back().HighPC = std::max(OutRanges.back().HighPC, OutHighPC);
4324+
else
4325+
OutRanges.emplace_back(OutLowPC, std::max(OutLowPC, OutHighPC));
43374326
}
43384327

4339-
return MergedRanges;
4328+
LLVM_DEBUG({
4329+
dbgs() << "BOLT-DEBUG: translated address range " << InRange << " -> ";
4330+
for (const DebugAddressRange &R : OutRanges)
4331+
dbgs() << R << ' ';
4332+
dbgs() << '\n';
4333+
});
4334+
4335+
return OutRanges;
43404336
}
43414337

43424338
MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
@@ -4367,92 +4363,6 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
43674363
}
43684364
}
43694365

4370-
DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
4371-
const DebugLocationsVector &InputLL) const {
4372-
DebugLocationsVector OutputLL;
4373-
4374-
if (isFolded())
4375-
return OutputLL;
4376-
4377-
// If the function hasn't changed - there's nothing to update.
4378-
if (!isEmitted())
4379-
return InputLL;
4380-
4381-
uint64_t PrevEndAddress = 0;
4382-
SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
4383-
for (const DebugLocationEntry &Entry : InputLL) {
4384-
const uint64_t Start = Entry.LowPC;
4385-
const uint64_t End = Entry.HighPC;
4386-
if (!containsAddress(Start)) {
4387-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4388-
"for "
4389-
<< *this << " : [0x" << Twine::utohexstr(Start)
4390-
<< ", 0x" << Twine::utohexstr(End) << "]\n");
4391-
continue;
4392-
}
4393-
uint64_t InputOffset = Start - getAddress();
4394-
const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
4395-
auto BBI = llvm::upper_bound(BasicBlockOffsets,
4396-
BasicBlockOffset(InputOffset, nullptr),
4397-
CompareBasicBlockOffsets());
4398-
--BBI;
4399-
do {
4400-
const BinaryBasicBlock *BB = BBI->second;
4401-
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
4402-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
4403-
"for "
4404-
<< *this << " : [0x" << Twine::utohexstr(Start)
4405-
<< ", 0x" << Twine::utohexstr(End) << "]\n");
4406-
PrevEndAddress = 0;
4407-
break;
4408-
}
4409-
4410-
// Skip the range if the block was deleted.
4411-
if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
4412-
const uint64_t StartAddress =
4413-
OutputStart + InputOffset - BB->getOffset();
4414-
uint64_t EndAddress = BB->getOutputAddressRange().second;
4415-
if (InputEndOffset < BB->getEndOffset())
4416-
EndAddress = StartAddress + InputEndOffset - InputOffset;
4417-
4418-
if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
4419-
OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
4420-
} else {
4421-
OutputLL.emplace_back(DebugLocationEntry{
4422-
StartAddress, std::max(StartAddress, EndAddress), Entry.Expr});
4423-
}
4424-
PrevEndAddress = OutputLL.back().HighPC;
4425-
PrevExpr = &OutputLL.back().Expr;
4426-
}
4427-
4428-
++BBI;
4429-
InputOffset = BB->getEndOffset();
4430-
} while (InputOffset < InputEndOffset);
4431-
}
4432-
4433-
// Sort and merge adjacent entries with identical location.
4434-
llvm::stable_sort(
4435-
OutputLL, [](const DebugLocationEntry &A, const DebugLocationEntry &B) {
4436-
return A.LowPC < B.LowPC;
4437-
});
4438-
DebugLocationsVector MergedLL;
4439-
PrevEndAddress = 0;
4440-
PrevExpr = nullptr;
4441-
for (const DebugLocationEntry &Entry : OutputLL) {
4442-
if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
4443-
MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
4444-
} else {
4445-
const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
4446-
const uint64_t End = std::max(Begin, Entry.HighPC);
4447-
MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
4448-
}
4449-
PrevEndAddress = MergedLL.back().HighPC;
4450-
PrevExpr = &MergedLL.back().Expr;
4451-
}
4452-
4453-
return MergedLL;
4454-
}
4455-
44564366
void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
44574367
if (!opts::shouldPrint(*this))
44584368
return;

0 commit comments

Comments
 (0)