Skip to content

Commit 464f65a

Browse files
authored
[DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (#123391)
**Summary** Add support for filtering line table entries based on `DW_AT_LLVM_stmt_sequence` attribute when looking up address ranges. This ensures that line entries are correctly attributed to their corresponding functions, even when multiple functions share the same address range due to optimizations. **Background** In #110192 we added support to clang to generate the `DW_AT_LLVM_stmt_sequence` attribute for `DW_TAG_subprogram`'s. Corresponding RFC: [New DWARF Attribute for Symbolication of Merged Functions](https://discourse.llvm.org/t/rfc-new-dwarf-attribute-for-symbolication-of-merged-functions/79434) The `DW_AT_LLVM_stmt_sequence` attribute allows accurate attribution of line number information to their corresponding functions, even in scenarios where functions are merged or share the same address space due to optimizations like Identical Code Folding (ICF) in the linker. **Implementation Details** The patch modifies `DWARFDebugLine::lookupAddressRange` to accept an optional DWARFDie parameter. When provided, the function checks if the `DIE` has a `DW_AT_LLVM_stmt_sequence` attribute. This attribute contains an offset into the line table that marks where the line entries for this DIE's function begin. If the attribute is present, the function filters the results to only include line entries from the sequence that starts at the specified offset. This ensures that even when multiple functions share the same address range, we return only the line entries that actually belong to the function represented by the DIE. The implementation: - Adds an optional DWARFDie parameter to lookupAddressRange - Extracts the `DW_AT_LLVM_stmt_sequence` offset if present - Modifies the address range lookup logic to filter sequences based on their offset - Returns only line entries from the matching sequence
1 parent a15618f commit 464f65a

File tree

3 files changed

+191
-21
lines changed

3 files changed

+191
-21
lines changed

llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ class DWARFDebugLine {
209209
unsigned LastRowIndex;
210210
bool Empty;
211211

212+
/// The offset into the line table where this sequence begins
213+
uint64_t StmtSeqOffset = UINT64_MAX;
214+
212215
void reset();
213216

214217
static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
@@ -243,8 +246,20 @@ class DWARFDebugLine {
243246
uint32_t lookupAddress(object::SectionedAddress Address,
244247
bool *IsApproximateLine = nullptr) const;
245248

246-
bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
247-
std::vector<uint32_t> &Result) const;
249+
/// Fills the Result argument with the indices of the rows that correspond
250+
/// to the address range specified by \p Address and \p Size.
251+
///
252+
/// \param Address - The starting address of the range.
253+
/// \param Size - The size of the address range.
254+
/// \param Result - The vector to fill with row indices.
255+
/// \param StmtSequenceOffset - if provided, only rows from the sequence
256+
/// starting at the matching offset will be added to the result.
257+
///
258+
/// Returns true if any rows were found.
259+
bool lookupAddressRange(
260+
object::SectionedAddress Address, uint64_t Size,
261+
std::vector<uint32_t> &Result,
262+
std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;
248263

249264
bool hasFileAtIndex(uint64_t FileIndex) const {
250265
return Prologue.hasFileAtIndex(FileIndex);
@@ -305,8 +320,20 @@ class DWARFDebugLine {
305320
uint32_t lookupAddressImpl(object::SectionedAddress Address,
306321
bool *IsApproximateLine = nullptr) const;
307322

308-
bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
309-
std::vector<uint32_t> &Result) const;
323+
/// Fills the Result argument with the indices of the rows that correspond
324+
/// to the address range specified by \p Address and \p Size.
325+
///
326+
/// \param Address - The starting address of the range.
327+
/// \param Size - The size of the address range.
328+
/// \param Result - The vector to fill with row indices.
329+
/// \param StmtSequenceOffset - if provided, only rows from the sequence
330+
/// starting at the matching offset will be added to the result.
331+
///
332+
/// Returns true if any rows were found.
333+
bool
334+
lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
335+
std::vector<uint32_t> &Result,
336+
std::optional<uint64_t> StmtSequenceOffset) const;
310337
};
311338

312339
const LineTable *getLineTable(uint64_t Offset) const;
@@ -376,7 +403,7 @@ class DWARFDebugLine {
376403
ParsingState(struct LineTable *LT, uint64_t TableOffset,
377404
function_ref<void(Error)> ErrorHandler);
378405

379-
void resetRowAndSequence();
406+
void resetRowAndSequence(uint64_t Offset);
380407
void appendRowToMatrix();
381408

382409
struct AddrOpIndexDelta {

llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
531531
FirstRowIndex = 0;
532532
LastRowIndex = 0;
533533
Empty = true;
534+
StmtSeqOffset = UINT64_MAX;
534535
}
535536

536537
DWARFDebugLine::LineTable::LineTable() { clear(); }
@@ -561,13 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
561562
DWARFDebugLine::ParsingState::ParsingState(
562563
struct LineTable *LT, uint64_t TableOffset,
563564
function_ref<void(Error)> ErrorHandler)
564-
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
565-
resetRowAndSequence();
566-
}
565+
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}
567566

568-
void DWARFDebugLine::ParsingState::resetRowAndSequence() {
567+
void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
569568
Row.reset(LineTable->Prologue.DefaultIsStmt);
570569
Sequence.reset();
570+
Sequence.StmtSeqOffset = Offset;
571571
}
572572

573573
void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -848,6 +848,10 @@ Error DWARFDebugLine::LineTable::parse(
848848
*OS << '\n';
849849
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
850850
}
851+
// *OffsetPtr points to the end of the prologue - i.e. the start of the first
852+
// sequence. So initialize the first sequence offset accordingly.
853+
State.resetRowAndSequence(*OffsetPtr);
854+
851855
bool TombstonedAddress = false;
852856
auto EmitRow = [&] {
853857
if (!TombstonedAddress) {
@@ -912,7 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
912916
// into this code path - if it were invalid, the default case would be
913917
// followed.
914918
EmitRow();
915-
State.resetRowAndSequence();
919+
// Cursor now points to right after the end_sequence opcode - so points
920+
// to the start of the next sequence - if one exists.
921+
State.resetRowAndSequence(Cursor.tell());
916922
break;
917923

918924
case DW_LNE_set_address:
@@ -1364,23 +1370,25 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
13641370

13651371
bool DWARFDebugLine::LineTable::lookupAddressRange(
13661372
object::SectionedAddress Address, uint64_t Size,
1367-
std::vector<uint32_t> &Result) const {
1373+
std::vector<uint32_t> &Result,
1374+
std::optional<uint64_t> StmtSequenceOffset) const {
13681375

13691376
// Search for relocatable addresses
1370-
if (lookupAddressRangeImpl(Address, Size, Result))
1377+
if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
13711378
return true;
13721379

13731380
if (Address.SectionIndex == object::SectionedAddress::UndefSection)
13741381
return false;
13751382

13761383
// Search for absolute addresses
13771384
Address.SectionIndex = object::SectionedAddress::UndefSection;
1378-
return lookupAddressRangeImpl(Address, Size, Result);
1385+
return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
13791386
}
13801387

13811388
bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
13821389
object::SectionedAddress Address, uint64_t Size,
1383-
std::vector<uint32_t> &Result) const {
1390+
std::vector<uint32_t> &Result,
1391+
std::optional<uint64_t> StmtSequenceOffset) const {
13841392
if (Sequences.empty())
13851393
return false;
13861394
uint64_t EndAddr = Address.Address + Size;
@@ -1389,16 +1397,38 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
13891397
Sequence.SectionIndex = Address.SectionIndex;
13901398
Sequence.HighPC = Address.Address;
13911399
SequenceIter LastSeq = Sequences.end();
1392-
SequenceIter SeqPos = llvm::upper_bound(
1393-
Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
1394-
if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
1400+
SequenceIter SeqPos;
1401+
1402+
if (StmtSequenceOffset) {
1403+
// If we have a statement sequence offset, find the specific sequence.
1404+
// Linear search for sequence with matching StmtSeqOffset
1405+
SeqPos = std::find_if(Sequences.begin(), LastSeq,
1406+
[&](const DWARFDebugLine::Sequence &S) {
1407+
return S.StmtSeqOffset == *StmtSequenceOffset;
1408+
});
1409+
1410+
// If sequence not found, return false
1411+
if (SeqPos == LastSeq)
1412+
return false;
1413+
1414+
// Set LastSeq to the next sequence since we only want the one matching
1415+
// sequence (sequences are guaranteed to have unique StmtSeqOffset)
1416+
LastSeq = SeqPos + 1;
1417+
} else {
1418+
// No specific sequence requested, find first sequence containing address
1419+
SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence,
1420+
DWARFDebugLine::Sequence::orderByHighPC);
1421+
if (SeqPos == LastSeq)
1422+
return false;
1423+
}
1424+
1425+
// If the start sequence doesn't contain the address, nothing to do
1426+
if (!SeqPos->containsPC(Address))
13951427
return false;
13961428

13971429
SequenceIter StartPos = SeqPos;
13981430

1399-
// Add the rows from the first sequence to the vector, starting with the
1400-
// index we just calculated
1401-
1431+
// Process sequences that overlap with the desired range
14021432
while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
14031433
const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
14041434
// For the first sequence, we need to find which row in the sequence is the

llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
910
#include "DwarfGenerator.h"
1011
#include "DwarfUtils.h"
1112
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
12-
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
1313
#include "llvm/Object/ObjectFile.h"
1414
#include "llvm/Testing/Support/Error.h"
1515
#include "gtest/gtest.h"
@@ -2035,4 +2035,117 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
20352035
EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
20362036
}
20372037

2038+
/// Test that lookupAddressRange correctly filters rows based on
2039+
/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence).
2040+
///
2041+
/// This test verifies that:
2042+
/// 1. When a statement-sequence offset is provided, lookupAddressRange
2043+
/// only returns rows from the sequence starting at that offset.
2044+
/// 2. When an invalid statement-sequence offset is provided, no rows
2045+
/// are returned.
2046+
/// 3. When no statement-sequence offset is provided, all matching rows
2047+
/// in the table are returned.
2048+
///
2049+
/// We build a line table with two sequences at the same address range
2050+
/// but different line numbers. Then we try lookups with various statement-
2051+
/// sequence offsets to check the filtering logic.
2052+
TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
2053+
if (!setupGenerator())
2054+
GTEST_SKIP();
2055+
2056+
// Create a line table that has two sequences covering [0x1000, 0x1004).
2057+
// Each sequence has two rows: addresses at 0x1000 and 0x1004, but
2058+
// they differ by line numbers (100 vs. 200, etc.).
2059+
//
2060+
// We'll pretend the first sequence starts at offset 0x2e in the line table,
2061+
// the second at 0x42, and we'll also test an invalid offset 0x66.
2062+
2063+
LineTable &LT = Gen->addLineTable();
2064+
2065+
// First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101)
2066+
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
2067+
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
2068+
// Advance the line register by 99 (so line=100) and copy.
2069+
LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
2070+
LT.addStandardOpcode(DW_LNS_copy, {});
2071+
// 0x4b is a special opcode: address += 4, line += 1 (so line=101).
2072+
LT.addByte(0x4b);
2073+
// End this sequence.
2074+
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
2075+
2076+
// Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201)
2077+
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
2078+
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
2079+
LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
2080+
LT.addStandardOpcode(DW_LNS_copy, {});
2081+
// 0x4b again: address += 4, line += 1 (so line=201).
2082+
LT.addByte(0x4b);
2083+
// End this second sequence.
2084+
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
2085+
2086+
// Generate the DWARF data.
2087+
generate();
2088+
2089+
// Parse the line table.
2090+
auto ExpectedLineTable =
2091+
Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context,
2092+
/*DwarfUnit=*/nullptr, RecordRecoverable);
2093+
ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
2094+
const auto *Table = *ExpectedLineTable;
2095+
2096+
// The table should have two sequences, each starting at our chosen offsets.
2097+
ASSERT_EQ(Table->Sequences.size(), 2u);
2098+
2099+
// 1) Try looking up with an invalid offset (simulating an invalid
2100+
// DW_AT_LLVM_stmt_sequence). We expect no rows.
2101+
{
2102+
std::vector<uint32_t> Rows;
2103+
bool Found = Table->lookupAddressRange(
2104+
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
2105+
/*StmtSequenceOffset=*/0x66); // invalid offset
2106+
EXPECT_FALSE(Found);
2107+
EXPECT_TRUE(Rows.empty());
2108+
}
2109+
2110+
// 2) Look up using the offset 0x2e (our first sequence). We expect
2111+
// to get the rows from that sequence only (which for 0x1000 is row #0).
2112+
{
2113+
std::vector<uint32_t> Rows;
2114+
bool Found = Table->lookupAddressRange(
2115+
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
2116+
/*StmtSequenceOffset=*/0x2e);
2117+
EXPECT_TRUE(Found);
2118+
ASSERT_EQ(Rows.size(), 1u);
2119+
// The first sequence's first row is index 0.
2120+
EXPECT_EQ(Rows[0], 0u);
2121+
}
2122+
2123+
// 3) Look up using the offset 0x42 (second sequence). For address 0x1000
2124+
// in that second sequence, we should see row #2.
2125+
{
2126+
std::vector<uint32_t> Rows;
2127+
bool Found = Table->lookupAddressRange(
2128+
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
2129+
/*StmtSequenceOffset=*/0x42);
2130+
EXPECT_TRUE(Found);
2131+
ASSERT_EQ(Rows.size(), 1u);
2132+
// The second sequence's first row is index 2 in the table.
2133+
EXPECT_EQ(Rows[0], 3u);
2134+
}
2135+
2136+
// 4) Look up with no statement-sequence offset specified.
2137+
// We should get rows from both sequences for address 0x1000.
2138+
{
2139+
std::vector<uint32_t> Rows;
2140+
bool Found = Table->lookupAddressRange(
2141+
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
2142+
std::nullopt /* no filter */);
2143+
EXPECT_TRUE(Found);
2144+
// The first sequence's row is #0, second's row is #2, so both should
2145+
// appear.
2146+
ASSERT_EQ(Rows.size(), 2u);
2147+
EXPECT_EQ(Rows[0], 0u);
2148+
EXPECT_EQ(Rows[1], 3u);
2149+
}
2150+
}
20382151
} // end anonymous namespace

0 commit comments

Comments
 (0)