Skip to content

[DWARFLinker] Adjust DW_AT_LLVM_stmt_sequence for rewritten line tables #128953

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,13 @@ class DwarfEmitter {
const AddressRanges &LinkedRanges) = 0;

/// Emit specified \p LineTable into .debug_line table.
virtual void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
const CompileUnit &Unit,
OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool) = 0;
/// The optional parameter RowOffsets, if provided, will be populated with the
/// offsets of each line table row in the output .debug_line section.
virtual void
emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
const CompileUnit &Unit, OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool,
std::vector<uint64_t> *RowOffsets = nullptr) = 0;

/// Emit the .debug_pubnames contribution for \p Unit.
virtual void emitPubNamesForUnit(const CompileUnit &Unit) = 0;
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct PatchLocation {

using RngListAttributesTy = SmallVector<PatchLocation>;
using LocListAttributesTy = SmallVector<PatchLocation>;
using StmtSeqListAttributesTy = SmallVector<PatchLocation>;

/// Stores all information relating to a compile unit, be it in its original
/// instance in the object file to its brand new cloned and generated DIE tree.
Expand Down Expand Up @@ -175,6 +176,12 @@ class CompileUnit {
return LocationAttributes;
}

// Provide access to the list of DW_AT_LLVM_stmt_sequence attributes that may
// need to be patched.
const StmtSeqListAttributesTy &getStmtSeqListAttributes() const {
return StmtSeqListAttributes;
}

/// Mark every DIE in this unit as kept. This function also
/// marks variables as InDebugMap so that they appear in the
/// reconstructed accelerator tables.
Expand Down Expand Up @@ -210,6 +217,10 @@ class CompileUnit {
/// debug_loc section.
void noteLocationAttribute(PatchLocation Attr);

// Record that the given DW_AT_LLVM_stmt_sequence attribute may need to be
// patched later.
void noteStmtSeqListAttribute(PatchLocation Attr);

/// Add a name accelerator entry for \a Die with \a Name.
void addNamespaceAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name);

Expand Down Expand Up @@ -309,6 +320,12 @@ class CompileUnit {
/// location expression.
LocListAttributesTy LocationAttributes;

// List of DW_AT_LLVM_stmt_sequence attributes that may need to be patched
// after the dwarf linker rewrites the line table. During line table rewrite
// the line table format might change, so we have to patch any offsets that
// reference its contents.
StmtSeqListAttributesTy StmtSeqListAttributes;

/// Accelerator entries for the unit, both for the pub*
/// sections and the apple* ones.
/// @{
Expand Down
14 changes: 9 additions & 5 deletions llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,13 @@ class DwarfStreamer : public DwarfEmitter {
}

/// Emit .debug_line table entry for specified \p LineTable
void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
const CompileUnit &Unit,
OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool) override;
/// The optional parameter RowOffsets, if provided, will be populated with the
/// offsets of each line table row in the output .debug_line section.
void
emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
const CompileUnit &Unit, OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool,
std::vector<uint64_t> *RowOffsets = nullptr) override;

uint64_t getLineSectionSize() const override { return LineSectionSize; }

Expand Down Expand Up @@ -266,7 +269,8 @@ class DwarfStreamer : public DwarfEmitter {
const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool);
void emitLineTableRows(const DWARFDebugLine::LineTable &LineTable,
MCSymbol *LineEndSym, unsigned AddressByteSize);
MCSymbol *LineEndSym, unsigned AddressByteSize,
std::vector<uint64_t> *RowOffsets = nullptr);
void emitIntOffset(uint64_t Offset, dwarf::DwarfFormat Format,
uint64_t &SectionSize);
void emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
Expand Down
163 changes: 129 additions & 34 deletions llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,18 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
->sizeOf(Unit.getOrigUnit().getFormParams());
}

if (AttrSpec.Attr == dwarf::DW_AT_LLVM_stmt_sequence) {
// If needed, we'll patch this sec_offset later with the correct offset.
auto Patch = Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
dwarf::DW_FORM_sec_offset,
DIEInteger(*Val.getAsSectionOffset()));

// Record this patch location so that it can be fixed up later.
Unit.noteStmtSeqListAttribute(Patch);

return Unit.getOrigUnit().getFormParams().getDwarfOffsetByteSize();
}

if (LLVM_UNLIKELY(Linker.Options.Update)) {
if (auto OptionalValue = Val.getAsUnsignedConstant())
Value = *OptionalValue;
Expand Down Expand Up @@ -2081,29 +2093,43 @@ void DWARFLinker::DIECloner::emitDebugAddrSection(
Emitter->emitDwarfDebugAddrsFooter(Unit, EndLabel);
}

/// A helper struct to help keep track of the association between the input and
/// output rows during line table rewriting. This is used to patch
/// DW_AT_LLVM_stmt_sequence attributes, which reference a particular line table
/// row.
struct TrackedRow {
DWARFDebugLine::Row Row;
size_t OriginalRowIndex;
bool isStartSeqInOutput;
};

/// Insert the new line info sequence \p Seq into the current
/// set of already linked line info \p Rows.
static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
std::vector<DWARFDebugLine::Row> &Rows) {
static void insertLineSequence(std::vector<TrackedRow> &Seq,
std::vector<TrackedRow> &Rows) {
if (Seq.empty())
return;

if (!Rows.empty() && Rows.back().Address < Seq.front().Address) {
// Mark the first row in Seq to indicate it is the start of a sequence
// in the output line table.
Seq.front().isStartSeqInOutput = true;

if (!Rows.empty() && Rows.back().Row.Address < Seq.front().Row.Address) {
llvm::append_range(Rows, Seq);
Seq.clear();
return;
}

object::SectionedAddress Front = Seq.front().Address;
object::SectionedAddress Front = Seq.front().Row.Address;
auto InsertPoint = partition_point(
Rows, [=](const DWARFDebugLine::Row &O) { return O.Address < Front; });
Rows, [=](const TrackedRow &O) { return O.Row.Address < Front; });

// FIXME: this only removes the unneeded end_sequence if the
// sequences have been inserted in order. Using a global sort like
// described in generateLineTableForUnit() and delaying the end_sequene
// described in generateLineTableForUnit() and delaying the end_sequence
// elimination to emitLineTableForUnit() we can get rid of all of them.
if (InsertPoint != Rows.end() && InsertPoint->Address == Front &&
InsertPoint->EndSequence) {
if (InsertPoint != Rows.end() && InsertPoint->Row.Address == Front &&
InsertPoint->Row.EndSequence) {
*InsertPoint = Seq.front();
Rows.insert(InsertPoint + 1, Seq.begin() + 1, Seq.end());
} else {
Expand Down Expand Up @@ -2171,75 +2197,144 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
LineTable.Rows.clear();

LineTable.Sequences = LT->Sequences;

Emitter->emitLineTableForUnit(LineTable, Unit, DebugStrPool,
DebugLineStrPool);
} else {
// This vector is the output line table.
std::vector<DWARFDebugLine::Row> NewRows;
NewRows.reserve(LT->Rows.size());
// Create TrackedRow objects for all input rows.
std::vector<TrackedRow> InputRows;
InputRows.reserve(LT->Rows.size());
for (size_t i = 0; i < LT->Rows.size(); i++)
InputRows.emplace_back(TrackedRow{LT->Rows[i], i, false});

// This vector is the output line table (still in TrackedRow form).
std::vector<TrackedRow> OutputRows;
OutputRows.reserve(InputRows.size());

// Current sequence of rows being extracted, before being inserted
// in NewRows.
std::vector<DWARFDebugLine::Row> Seq;
// in OutputRows.
std::vector<TrackedRow> Seq;
Seq.reserve(InputRows.size());

const auto &FunctionRanges = Unit.getFunctionRanges();
std::optional<AddressRangeValuePair> CurrRange;

// FIXME: This logic is meant to generate exactly the same output as
// Darwin's classic dsymutil. There is a nicer way to implement this
// by simply putting all the relocated line info in NewRows and simply
// sorting NewRows before passing it to emitLineTableForUnit. This
// by simply putting all the relocated line info in OutputRows and simply
// sorting OutputRows before passing it to emitLineTableForUnit. This
// should be correct as sequences for a function should stay
// together in the sorted output. There are a few corner cases that
// look suspicious though, and that required to implement the logic
// this way. Revisit that once initial validation is finished.

// Iterate over the object file line info and extract the sequences
// that correspond to linked functions.
for (DWARFDebugLine::Row Row : LT->Rows) {
for (size_t i = 0; i < InputRows.size(); i++) {
TrackedRow TR = InputRows[i];

// Check whether we stepped out of the range. The range is
// half-open, but consider accept the end address of the range if
// half-open, but consider accepting the end address of the range if
// it is marked as end_sequence in the input (because in that
// case, the relocation offset is accurate and that entry won't
// serve as the start of another function).
if (!CurrRange || !CurrRange->Range.contains(Row.Address.Address)) {
// We just stepped out of a known range. Insert a end_sequence
if (!CurrRange || !CurrRange->Range.contains(TR.Row.Address.Address)) {
// We just stepped out of a known range. Insert an end_sequence
// corresponding to the end of the range.
uint64_t StopAddress =
CurrRange ? CurrRange->Range.end() + CurrRange->Value : -1ULL;
CurrRange = FunctionRanges.getRangeThatContains(Row.Address.Address);
CurrRange =
FunctionRanges.getRangeThatContains(TR.Row.Address.Address);
if (StopAddress != -1ULL && !Seq.empty()) {
// Insert end sequence row with the computed end address, but
// the same line as the previous one.
auto NextLine = Seq.back();
NextLine.Address.Address = StopAddress;
NextLine.EndSequence = 1;
NextLine.PrologueEnd = 0;
NextLine.BasicBlock = 0;
NextLine.EpilogueBegin = 0;
NextLine.Row.Address.Address = StopAddress;
NextLine.Row.EndSequence = 1;
NextLine.Row.PrologueEnd = 0;
NextLine.Row.BasicBlock = 0;
NextLine.Row.EpilogueBegin = 0;
Seq.push_back(NextLine);
insertLineSequence(Seq, NewRows);
insertLineSequence(Seq, OutputRows);
}

if (!CurrRange)
continue;
}

// Ignore empty sequences.
if (Row.EndSequence && Seq.empty())
if (TR.Row.EndSequence && Seq.empty())
continue;

// Relocate row address and add it to the current sequence.
Row.Address.Address += CurrRange->Value;
Seq.emplace_back(Row);
TR.Row.Address.Address += CurrRange->Value;
Seq.push_back(TR);

if (Row.EndSequence)
insertLineSequence(Seq, NewRows);
if (TR.Row.EndSequence)
insertLineSequence(Seq, OutputRows);
}

LineTable.Rows = std::move(NewRows);
// Materialize the tracked rows into final DWARFDebugLine::Row objects.
LineTable.Rows.clear();
LineTable.Rows.reserve(OutputRows.size());
for (auto &TR : OutputRows)
LineTable.Rows.push_back(TR.Row);

// Use OutputRowOffsets to store the offsets of each line table row in the
// output .debug_line section.
std::vector<uint64_t> OutputRowOffsets;

// The unit might not have any DW_AT_LLVM_stmt_sequence attributes, so use
// hasStmtSeq to skip the patching logic.
bool hasStmtSeq = Unit.getStmtSeqListAttributes().size() > 0;
Emitter->emitLineTableForUnit(LineTable, Unit, DebugStrPool,
DebugLineStrPool,
hasStmtSeq ? &OutputRowOffsets : nullptr);

if (hasStmtSeq) {
assert(OutputRowOffsets.size() == OutputRows.size() &&
"must have an offset for each row");

// Create a map of stmt sequence offsets to original row indices.
DenseMap<uint64_t, unsigned> SeqOffToOrigRow;
for (const DWARFDebugLine::Sequence &Seq : LT->Sequences)
SeqOffToOrigRow[Seq.StmtSeqOffset] = Seq.FirstRowIndex;

// Create a map of original row indices to new row indices.
DenseMap<size_t, size_t> OrigRowToNewRow;
for (size_t i = 0; i < OutputRows.size(); ++i)
OrigRowToNewRow[OutputRows[i].OriginalRowIndex] = i;

// Patch DW_AT_LLVM_stmt_sequence attributes in the compile unit DIE
// with the correct offset into the .debug_line section.
for (const auto &StmtSeq : Unit.getStmtSeqListAttributes()) {
uint64_t OrigStmtSeq = StmtSeq.get();
// 1. Get the original row index from the stmt list offset.
auto OrigRowIter = SeqOffToOrigRow.find(OrigStmtSeq);
assert(OrigRowIter != SeqOffToOrigRow.end() &&
"Stmt list offset not found in sequence offsets map");
size_t OrigRowIndex = OrigRowIter->second;

// 2. Get the new row index from the original row index.
auto NewRowIter = OrigRowToNewRow.find(OrigRowIndex);
if (NewRowIter == OrigRowToNewRow.end()) {
// If the original row index is not found in the map, update the
// stmt_sequence attribute to the 'invalid offset' magic value.
StmtSeq.set(UINT64_MAX);
continue;
}

// 3. Get the offset of the new row in the output .debug_line section.
assert(NewRowIter->second < OutputRowOffsets.size() &&
"New row index out of bounds");
uint64_t NewStmtSeqOffset = OutputRowOffsets[NewRowIter->second];

// 4. Patch the stmt_list attribute with the new offset.
StmtSeq.set(NewStmtSeqOffset);
}
}
}

Emitter->emitLineTableForUnit(LineTable, Unit, DebugStrPool,
DebugLineStrPool);
} else
Linker.reportWarning("Cann't load line table.", ObjFile);
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/DWARFLinker/Classic/DWARFLinkerCompileUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ void CompileUnit::noteLocationAttribute(PatchLocation Attr) {
LocationAttributes.emplace_back(Attr);
}

void CompileUnit::noteStmtSeqListAttribute(PatchLocation Attr) {
StmtSeqListAttributes.emplace_back(Attr);
}

void CompileUnit::addNamespaceAccelerator(const DIE *Die,
DwarfStringPoolEntryRef Name) {
Namespaces.emplace_back(Name, Die);
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,8 @@ void DwarfStreamer::emitDwarfDebugLocListsTableFragment(

void DwarfStreamer::emitLineTableForUnit(
const DWARFDebugLine::LineTable &LineTable, const CompileUnit &Unit,
OffsetsStringPool &DebugStrPool, OffsetsStringPool &DebugLineStrPool) {
OffsetsStringPool &DebugStrPool, OffsetsStringPool &DebugLineStrPool,
std::vector<uint64_t> *RowOffsets) {
// Switch to the section where the table will be emitted into.
MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection());

Expand All @@ -830,7 +831,7 @@ void DwarfStreamer::emitLineTableForUnit(

// Emit rows.
emitLineTableRows(LineTable, LineEndSym,
Unit.getOrigUnit().getAddressByteSize());
Unit.getOrigUnit().getAddressByteSize(), RowOffsets);
}

void DwarfStreamer::emitLineTablePrologue(const DWARFDebugLine::Prologue &P,
Expand Down Expand Up @@ -1036,7 +1037,7 @@ void DwarfStreamer::emitLineTableProloguePayload(

void DwarfStreamer::emitLineTableRows(
const DWARFDebugLine::LineTable &LineTable, MCSymbol *LineEndSym,
unsigned AddressByteSize) {
unsigned AddressByteSize, std::vector<uint64_t> *RowOffsets) {

MCDwarfLineTableParams Params;
Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
Expand Down Expand Up @@ -1068,6 +1069,11 @@ void DwarfStreamer::emitLineTableRows(
unsigned RowsSinceLastSequence = 0;

for (const DWARFDebugLine::Row &Row : LineTable.Rows) {
// If we're tracking row offsets, record the current section size as the
// offset of this row.
if (RowOffsets)
RowOffsets->push_back(LineSectionSize);

int64_t AddressDelta;
if (Address == -1ULL) {
MS->emitIntValue(dwarf::DW_LNS_extended_op, 1);
Expand Down
Loading
Loading