Skip to content

[DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups #123391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ class DWARFDebugLine {
unsigned LastRowIndex;
bool Empty;

/// The offset into the line table where this sequence begins
uint64_t StmtSeqOffset = UINT64_MAX;

void reset();

static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
Expand Down Expand Up @@ -243,8 +246,20 @@ class DWARFDebugLine {
uint32_t lookupAddress(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;

bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const;
/// Fills the Result argument with the indices of the rows that correspond
/// to the address range specified by \p Address and \p Size.
///
/// \param Address - The starting address of the range.
/// \param Size - The size of the address range.
/// \param Result - The vector to fill with row indices.
/// \param StmtSequenceOffset - if provided, only rows from the sequence
/// starting at the matching offset will be added to the result.
///
/// Returns true if any rows were found.
bool lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;

bool hasFileAtIndex(uint64_t FileIndex) const {
return Prologue.hasFileAtIndex(FileIndex);
Expand Down Expand Up @@ -305,8 +320,20 @@ class DWARFDebugLine {
uint32_t lookupAddressImpl(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;

bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const;
/// Fills the Result argument with the indices of the rows that correspond
/// to the address range specified by \p Address and \p Size.
///
/// \param Address - The starting address of the range.
/// \param Size - The size of the address range.
/// \param Result - The vector to fill with row indices.
/// \param StmtSequenceOffset - if provided, only rows from the sequence
/// starting at the matching offset will be added to the result.
///
/// Returns true if any rows were found.
bool
lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const;
};

const LineTable *getLineTable(uint64_t Offset) const;
Expand Down Expand Up @@ -376,7 +403,7 @@ class DWARFDebugLine {
ParsingState(struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler);

void resetRowAndSequence();
void resetRowAndSequence(uint64_t Offset);
void appendRowToMatrix();

struct AddrOpIndexDelta {
Expand Down
119 changes: 85 additions & 34 deletions llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
FirstRowIndex = 0;
LastRowIndex = 0;
Empty = true;
StmtSeqOffset = UINT64_MAX;
}

DWARFDebugLine::LineTable::LineTable() { clear(); }
Expand Down Expand Up @@ -561,13 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
DWARFDebugLine::ParsingState::ParsingState(
struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler)
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
resetRowAndSequence();
}
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}

void DWARFDebugLine::ParsingState::resetRowAndSequence() {
void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
Row.reset(LineTable->Prologue.DefaultIsStmt);
Sequence.reset();
Sequence.StmtSeqOffset = Offset;
}

void DWARFDebugLine::ParsingState::appendRowToMatrix() {
Expand Down Expand Up @@ -848,6 +848,10 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
// *OffsetPtr points to the end of the prologue - i.e. the start of the first
// sequence. So initialize the first sequence offset accordingly.
State.resetRowAndSequence(*OffsetPtr);

bool TombstonedAddress = false;
auto EmitRow = [&] {
if (!TombstonedAddress) {
Expand Down Expand Up @@ -912,7 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
// into this code path - if it were invalid, the default case would be
// followed.
EmitRow();
State.resetRowAndSequence();
// Cursor now points to right after the end_sequence opcode - so points
// to the start of the next sequence - if one exists.
State.resetRowAndSequence(Cursor.tell());
break;

case DW_LNE_set_address:
Expand Down Expand Up @@ -1364,66 +1370,111 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,

bool DWARFDebugLine::LineTable::lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const {
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const {

// Search for relocatable addresses
if (lookupAddressRangeImpl(Address, Size, Result))
if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
return true;

if (Address.SectionIndex == object::SectionedAddress::UndefSection)
return false;

// Search for absolute addresses
Address.SectionIndex = object::SectionedAddress::UndefSection;
return lookupAddressRangeImpl(Address, Size, Result);
return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
}

bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const {
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
uint64_t EndAddr = Address.Address + Size;
// First, find an instruction sequence containing the given address.
DWARFDebugLine::Sequence Sequence;
Sequence.SectionIndex = Address.SectionIndex;
Sequence.HighPC = Address.Address;
SequenceIter LastSeq = Sequences.end();
SequenceIter SeqPos = llvm::upper_bound(
Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
return false;

SequenceIter StartPos = SeqPos;
const uint64_t EndAddr = Address.Address + Size;

// Helper: Given a sequence and a starting address, find the subset
// of rows within [Address, EndAddr) and append them to `Result`.
auto addRowsFromSequence = [&](const Sequence &Seq,
object::SectionedAddress StartAddr,
uint64_t EndAddress, bool IsFirstSequence) {
// If this sequence does not intersect our [StartAddr, EndAddress) range,
// do nothing.
if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
return;

// Add the rows from the first sequence to the vector, starting with the
// index we just calculated
// For the "first" sequence in the search, we must figure out which row
// actually starts within our address range.
uint32_t FirstRowIndex = Seq.FirstRowIndex;
if (IsFirstSequence)
FirstRowIndex = findRowInSeq(Seq, StartAddr);

while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
// For the first sequence, we need to find which row in the sequence is the
// first in our range.
uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
if (SeqPos == StartPos)
FirstRowIndex = findRowInSeq(CurSeq, Address);

// Figure out the last row in the range.
// Similarly, compute the last row that is within [StartAddr, EndAddress).
uint32_t LastRowIndex =
findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
if (LastRowIndex == UnknownRowIndex)
LastRowIndex = CurSeq.LastRowIndex - 1;
LastRowIndex = Seq.LastRowIndex - 1;

assert(FirstRowIndex != UnknownRowIndex);
assert(LastRowIndex != UnknownRowIndex);

// Append all row indices in [FirstRowIndex, LastRowIndex].
for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
Result.push_back(I);
}
};

// If a stmt_sequence_offset was provided, do a binary search to find the
// single sequence that matches that offset. Then add only its relevant rows.
if (StmtSequenceOffset) {
// Binary-search the Sequences by their StmtSeqOffset:
auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
[](const Sequence &Seq, uint64_t OffsetVal) {
return Seq.StmtSeqOffset < OffsetVal;
});

// If not found or mismatched, there’s no match to return.
if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
return false;

// Now add rows from the discovered sequence if it intersects [Address,
// EndAddr).
addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
return !Result.empty();
}

// Otherwise, fall back to logic of walking sequences by Address.
// We first find a sequence containing `Address` (via upper_bound on HighPC),
// then proceed forward through overlapping sequences in ascending order.

// Construct a dummy Sequence to find where `Address` fits by HighPC.
DWARFDebugLine::Sequence SearchSeq;
SearchSeq.SectionIndex = Address.SectionIndex;
SearchSeq.HighPC = Address.Address;

auto LastSeq = Sequences.end();
auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
DWARFDebugLine::Sequence::orderByHighPC);
if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
return false;

// This marks the first sequence we found that might contain Address.
const auto StartPos = SeqPos;

// Walk forward until sequences no longer intersect [Address, EndAddr).
while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
// Add rows only if the sequence is in the same section:
if (SeqPos->SectionIndex == Address.SectionIndex) {
// For the very first sequence, we need the row that lines up with
// `Address`.
bool IsFirst = (SeqPos == StartPos);
addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst);
}
++SeqPos;
}

return true;
return !Result.empty();
}

std::optional<StringRef>
Expand Down
115 changes: 114 additions & 1 deletion llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "DwarfGenerator.h"
#include "DwarfUtils.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
Expand Down Expand Up @@ -2035,4 +2035,117 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
}

/// Test that lookupAddressRange correctly filters rows based on
/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence).
///
/// This test verifies that:
/// 1. When a statement-sequence offset is provided, lookupAddressRange
/// only returns rows from the sequence starting at that offset.
/// 2. When an invalid statement-sequence offset is provided, no rows
/// are returned.
/// 3. When no statement-sequence offset is provided, all matching rows
/// in the table are returned.
///
/// We build a line table with two sequences at the same address range
/// but different line numbers. Then we try lookups with various statement-
/// sequence offsets to check the filtering logic.
TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
if (!setupGenerator())
GTEST_SKIP();

// Create a line table that has two sequences covering [0x1000, 0x1004).
// Each sequence has two rows: addresses at 0x1000 and 0x1004, but
// they differ by line numbers (100 vs. 200, etc.).
//
// We'll pretend the first sequence starts at offset 0x2e in the line table,
// the second at 0x42, and we'll also test an invalid offset 0x66.

LineTable &LT = Gen->addLineTable();

// First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
// Advance the line register by 99 (so line=100) and copy.
LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
// 0x4b is a special opcode: address += 4, line += 1 (so line=101).
LT.addByte(0x4b);
// End this sequence.
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});

// Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
// 0x4b again: address += 4, line += 1 (so line=201).
LT.addByte(0x4b);
// End this second sequence.
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});

// Generate the DWARF data.
generate();

// Parse the line table.
auto ExpectedLineTable =
Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context,
/*DwarfUnit=*/nullptr, RecordRecoverable);
ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
const auto *Table = *ExpectedLineTable;

// The table should have two sequences, each starting at our chosen offsets.
ASSERT_EQ(Table->Sequences.size(), 2u);

// 1) Try looking up with an invalid offset (simulating an invalid
// DW_AT_LLVM_stmt_sequence). We expect no rows.
{
std::vector<uint32_t> Rows;
bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
/*StmtSequenceOffset=*/0x66); // invalid offset
EXPECT_FALSE(Found);
EXPECT_TRUE(Rows.empty());
}

// 2) Look up using the offset 0x2e (our first sequence). We expect
// to get the rows from that sequence only (which for 0x1000 is row #0).
{
std::vector<uint32_t> Rows;
bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
/*StmtSequenceOffset=*/0x2e);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
// The first sequence's first row is index 0.
EXPECT_EQ(Rows[0], 0u);
}

// 3) Look up using the offset 0x42 (second sequence). For address 0x1000
// in that second sequence, we should see row #2.
{
std::vector<uint32_t> Rows;
bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
/*StmtSequenceOffset=*/0x42);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
// The second sequence's first row is index 2 in the table.
EXPECT_EQ(Rows[0], 3u);
}

// 4) Look up with no statement-sequence offset specified.
// We should get rows from both sequences for address 0x1000.
{
std::vector<uint32_t> Rows;
bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
std::nullopt /* no filter */);
EXPECT_TRUE(Found);
// The first sequence's row is #0, second's row is #2, so both should
// appear.
ASSERT_EQ(Rows.size(), 2u);
EXPECT_EQ(Rows[0], 0u);
EXPECT_EQ(Rows[1], 3u);
}
}
} // end anonymous namespace