Skip to content

Commit 34a8e6e

Browse files
committed
[LLDB] Enable 64 bit debug/type offset
This came out of from https://discourse.llvm.org/t/dwarf-dwp-4gb-limit/63902 With big binaries we can have .dwp files where .debug_info.dwo section can grow beyond 4GB. We would like to support this in LLVM and in LLDB. The plan is to enable manual parsing of cu/tu index in DWARF library (https://reviews.llvm.org/D137882), and then switch internal index data structure to 64 bit. For the second part is to enable 64bit offset support in LLDB with this patch. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D138618
1 parent 89b144e commit 34a8e6e

23 files changed

+211
-194
lines changed

lldb/include/lldb/Core/dwarf.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ typedef uint64_t dw_addr_t; // Dwarf address define that must be big enough for
3030
// any addresses in the compile units that get
3131
// parsed
3232

33-
typedef uint32_t dw_offset_t; // Dwarf Debug Information Entry offset for any
33+
typedef uint64_t dw_offset_t; // Dwarf Debug Information Entry offset for any
3434
// offset into the file
3535

3636
/* Constants */
37-
#define DW_INVALID_OFFSET (~(dw_offset_t)0)
37+
#define DW_DIE_OFFSET_MAX_BITSIZE 40
38+
#define DW_INVALID_OFFSET (((uint64_t)1u << DW_DIE_OFFSET_MAX_BITSIZE) - 1)
3839
#define DW_INVALID_INDEX 0xFFFFFFFFul
3940

4041
// #define DW_ADDR_none 0x0

lldb/include/lldb/Symbol/DWARFCallFrameInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ class DWARFCallFrameInfo {
128128

129129
void GetFDEIndex();
130130

131-
bool FDEToUnwindPlan(uint32_t offset, Address startaddr,
131+
bool FDEToUnwindPlan(dw_offset_t offset, Address startaddr,
132132
UnwindPlan &unwind_plan);
133133

134134
const CIE *GetCIE(dw_offset_t cie_offset);
@@ -159,7 +159,7 @@ class DWARFCallFrameInfo {
159159
Type m_type;
160160

161161
CIESP
162-
ParseCIE(const uint32_t cie_offset);
162+
ParseCIE(const dw_offset_t cie_offset);
163163

164164
lldb::RegisterKind GetRegisterKind() const {
165165
return m_type == EH ? lldb::eRegisterKindEHFrame : lldb::eRegisterKindDWARF;

lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ void AppleDWARFIndex::GetGlobalVariables(
8080
if (!m_apple_names_up)
8181
return;
8282

83-
lldbassert(!cu.GetSymbolFileDWARF().GetDwoNum());
8483
const DWARFUnit &non_skeleton_cu = cu.GetNonSkeletonUnit();
8584
DWARFMappedHash::DIEInfoArray hash_data;
8685
m_apple_names_up->AppendAllDIEsInRange(non_skeleton_cu.GetOffset(),

lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,40 +17,22 @@ using namespace lldb_private;
1717

1818
void llvm::format_provider<DIERef>::format(const DIERef &ref, raw_ostream &OS,
1919
StringRef Style) {
20-
if (ref.dwo_num())
21-
OS << format_hex_no_prefix(*ref.dwo_num(), 8) << "/";
20+
if (ref.file_index())
21+
OS << format_hex_no_prefix(*ref.file_index(), 8) << "/";
2222
OS << (ref.section() == DIERef::DebugInfo ? "INFO" : "TYPE");
2323
OS << "/" << format_hex_no_prefix(ref.die_offset(), 8);
2424
}
2525

26-
constexpr uint32_t k_dwo_num_mask = 0x3FFFFFFF;
27-
constexpr uint32_t k_dwo_num_valid_bitmask = (1u << 30);
28-
constexpr uint32_t k_section_bitmask = (1u << 31);
29-
3026
std::optional<DIERef> DIERef::Decode(const DataExtractor &data,
3127
lldb::offset_t *offset_ptr) {
32-
const uint32_t bitfield_storage = data.GetU32(offset_ptr);
33-
uint32_t dwo_num = bitfield_storage & k_dwo_num_mask;
34-
bool dwo_num_valid = (bitfield_storage & (k_dwo_num_valid_bitmask)) != 0;
35-
Section section = (Section)((bitfield_storage & (k_section_bitmask)) != 0);
28+
DIERef die_ref(data.GetU64(offset_ptr));
29+
3630
// DIE offsets can't be zero and if we fail to decode something from data,
3731
// it will return 0
38-
dw_offset_t die_offset = data.GetU32(offset_ptr);
39-
if (die_offset == 0)
32+
if (!die_ref.die_offset())
4033
return std::nullopt;
41-
if (dwo_num_valid)
42-
return DIERef(dwo_num, section, die_offset);
43-
else
44-
return DIERef(std::nullopt, section, die_offset);
45-
}
4634

47-
void DIERef::Encode(DataEncoder &encoder) const {
48-
uint32_t bitfield_storage = m_dwo_num;
49-
if (m_dwo_num_valid)
50-
bitfield_storage |= k_dwo_num_valid_bitmask;
51-
if (m_section)
52-
bitfield_storage |= k_section_bitmask;
53-
encoder.AppendU32(bitfield_storage);
54-
static_assert(sizeof(m_die_offset) == 4, "m_die_offset must be 4 bytes");
55-
encoder.AppendU32(m_die_offset);
35+
return die_ref;
5636
}
37+
38+
void DIERef::Encode(DataEncoder &encoder) const { encoder.AppendU64(get_id()); }

lldb/source/Plugins/SymbolFile/DWARF/DIERef.h

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,53 @@
1010
#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DIEREF_H
1111

1212
#include "lldb/Core/dwarf.h"
13-
#include "llvm/Support/FormatProviders.h"
13+
#include "lldb/Utility/LLDBAssert.h"
1414
#include <cassert>
1515
#include <optional>
16-
#include <vector>
1716

1817
/// Identifies a DWARF debug info entry within a given Module. It contains three
1918
/// "coordinates":
20-
/// - dwo_num: identifies the dwo file in the Module. If this field is not set,
21-
/// the DIERef references the main file.
19+
/// - file_index: identifies the separate stand alone debug info file
20+
/// that is referred to by the main debug info file. This will be the
21+
/// index of a DWO file for fission, or the .o file on mac when not
22+
/// using a dSYM file. If this field is not set, then this references
23+
/// a DIE inside the original object file.
2224
/// - section: identifies the section of the debug info entry in the given file:
2325
/// debug_info or debug_types.
2426
/// - die_offset: The offset of the debug info entry as an absolute offset from
2527
/// the beginning of the section specified in the section field.
2628
class DIERef {
2729
public:
2830
enum Section : uint8_t { DebugInfo, DebugTypes };
29-
30-
DIERef(std::optional<uint32_t> dwo_num, Section section,
31+
DIERef(std::optional<uint32_t> file_index, Section section,
3132
dw_offset_t die_offset)
32-
: m_dwo_num(dwo_num.value_or(0)), m_dwo_num_valid(bool(dwo_num)),
33-
m_section(section), m_die_offset(die_offset) {
34-
assert(this->dwo_num() == dwo_num && "Dwo number out of range?");
33+
: m_die_offset(die_offset), m_file_index(file_index.value_or(0)),
34+
m_file_index_valid(file_index ? true : false), m_section(section) {
35+
assert(this->file_index() == file_index && "File Index is out of range?");
36+
}
37+
38+
explicit DIERef(lldb::user_id_t uid) {
39+
m_die_offset = uid & k_die_offset_mask;
40+
m_file_index_valid = (uid & k_file_index_valid_bit) != 0;
41+
m_file_index = m_file_index_valid
42+
? (uid >> k_die_offset_bit_size) & k_file_index_mask
43+
: 0;
44+
m_section =
45+
(uid & k_section_bit) != 0 ? Section::DebugTypes : Section::DebugInfo;
46+
}
47+
48+
lldb::user_id_t get_id() const {
49+
if (m_die_offset == k_die_offset_mask)
50+
return LLDB_INVALID_UID;
51+
52+
return lldb::user_id_t(file_index().value_or(0)) << k_die_offset_bit_size |
53+
die_offset() | (m_file_index_valid ? k_file_index_valid_bit : 0) |
54+
(section() == Section::DebugTypes ? k_section_bit : 0);
3555
}
3656

37-
std::optional<uint32_t> dwo_num() const {
38-
if (m_dwo_num_valid)
39-
return m_dwo_num;
57+
std::optional<uint32_t> file_index() const {
58+
if (m_file_index_valid)
59+
return m_file_index;
4060
return std::nullopt;
4161
}
4262

@@ -45,17 +65,17 @@ class DIERef {
4565
dw_offset_t die_offset() const { return m_die_offset; }
4666

4767
bool operator<(DIERef other) const {
48-
if (m_dwo_num_valid != other.m_dwo_num_valid)
49-
return m_dwo_num_valid < other.m_dwo_num_valid;
50-
if (m_dwo_num_valid && (m_dwo_num != other.m_dwo_num))
51-
return m_dwo_num < other.m_dwo_num;
68+
if (m_file_index_valid != other.m_file_index_valid)
69+
return m_file_index_valid < other.m_file_index_valid;
70+
if (m_file_index_valid && (m_file_index != other.m_file_index))
71+
return m_file_index < other.m_file_index;
5272
if (m_section != other.m_section)
5373
return m_section < other.m_section;
5474
return m_die_offset < other.m_die_offset;
5575
}
5676

5777
bool operator==(const DIERef &rhs) const {
58-
return dwo_num() == rhs.dwo_num() && m_section == rhs.m_section &&
78+
return file_index() == rhs.file_index() && m_section == rhs.m_section &&
5979
m_die_offset == rhs.m_die_offset;
6080
}
6181

@@ -85,11 +105,28 @@ class DIERef {
85105
///
86106
void Encode(lldb_private::DataEncoder &encoder) const;
87107

108+
static constexpr uint64_t k_die_offset_bit_size = DW_DIE_OFFSET_MAX_BITSIZE;
109+
static constexpr uint64_t k_file_index_bit_size =
110+
64 - DW_DIE_OFFSET_MAX_BITSIZE - /* size of control bits */ 2;
111+
112+
static constexpr uint64_t k_file_index_valid_bit =
113+
(1ull << (k_file_index_bit_size + k_die_offset_bit_size));
114+
static constexpr uint64_t k_section_bit =
115+
(1ull << (k_file_index_bit_size + k_die_offset_bit_size + 1));
116+
static constexpr uint64_t
117+
k_file_index_mask = (~0ull) >> (64 - k_file_index_bit_size); // 0x3fffff;
118+
static constexpr uint64_t k_die_offset_mask = (~0ull) >>
119+
(64 - k_die_offset_bit_size);
120+
88121
private:
89-
uint32_t m_dwo_num : 30;
90-
uint32_t m_dwo_num_valid : 1;
91-
uint32_t m_section : 1;
92-
dw_offset_t m_die_offset;
122+
// Allow 2TB of .debug_info/.debug_types offset
123+
dw_offset_t m_die_offset : k_die_offset_bit_size;
124+
// Used for DWO index or for .o file index on mac
125+
dw_offset_t m_file_index : k_file_index_bit_size;
126+
// Set to 1 if m_file_index is a DWO number
127+
dw_offset_t m_file_index_valid : 1;
128+
// Set to 0 for .debug_info 1 for .debug_types,
129+
dw_offset_t m_section : 1;
93130
};
94131
static_assert(sizeof(DIERef) == 8);
95132

lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -731,10 +731,10 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
731731
}
732732
}
733733

734-
type_sp = dwarf->MakeType(
735-
die.GetID(), attrs.name, attrs.byte_size, nullptr,
736-
dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl,
737-
clang_type, resolve_state, TypePayloadClang(GetOwningClangModule(die)));
734+
type_sp = dwarf->MakeType(die.GetID(), attrs.name, attrs.byte_size, nullptr,
735+
attrs.type.Reference().GetID(), encoding_data_type,
736+
&attrs.decl, clang_type, resolve_state,
737+
TypePayloadClang(GetOwningClangModule(die)));
738738

739739
dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
740740
return type_sp;
@@ -834,11 +834,11 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc,
834834

835835
LinkDeclContextToDIE(TypeSystemClang::GetDeclContextForType(clang_type), die);
836836

837-
type_sp = dwarf->MakeType(die.GetID(), attrs.name, attrs.byte_size, nullptr,
838-
dwarf->GetUID(attrs.type.Reference()),
839-
Type::eEncodingIsUID, &attrs.decl, clang_type,
840-
Type::ResolveState::Forward,
841-
TypePayloadClang(GetOwningClangModule(die)));
837+
type_sp =
838+
dwarf->MakeType(die.GetID(), attrs.name, attrs.byte_size, nullptr,
839+
attrs.type.Reference().GetID(), Type::eEncodingIsUID,
840+
&attrs.decl, clang_type, Type::ResolveState::Forward,
841+
TypePayloadClang(GetOwningClangModule(die)));
842842

843843
if (TypeSystemClang::StartTagDeclarationDefinition(clang_type)) {
844844
if (die.HasChildren()) {
@@ -1336,7 +1336,7 @@ DWARFASTParserClang::ParseArrayType(const DWARFDIE &die,
13361336
ConstString empty_name;
13371337
TypeSP type_sp =
13381338
dwarf->MakeType(die.GetID(), empty_name, array_element_bit_stride / 8,
1339-
nullptr, dwarf->GetUID(type_die), Type::eEncodingIsUID,
1339+
nullptr, type_die.GetID(), Type::eEncodingIsUID,
13401340
&attrs.decl, clang_type, Type::ResolveState::Full);
13411341
type_sp->SetEncodingType(element_type);
13421342
const clang::Type *type = ClangUtil::GetQualType(clang_type).getTypePtr();

lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ std::optional<DIERef> DWARFBaseDIE::GetDIERef() const {
2323
if (!IsValid())
2424
return std::nullopt;
2525

26-
return DIERef(m_cu->GetSymbolFileDWARF().GetDwoNum(), m_cu->GetDebugSection(),
27-
m_die->GetOffset());
26+
return DIERef(m_cu->GetSymbolFileDWARF().GetFileIndex(),
27+
m_cu->GetDebugSection(), m_die->GetOffset());
2828
}
2929

3030
dw_tag_t DWARFBaseDIE::Tag() const {
@@ -70,8 +70,10 @@ uint64_t DWARFBaseDIE::GetAttributeValueAsAddress(const dw_attr_t attr,
7070
}
7171

7272
lldb::user_id_t DWARFBaseDIE::GetID() const {
73-
if (IsValid())
74-
return GetDWARF()->GetUID(*this);
73+
const std::optional<DIERef> &ref = this->GetDIERef();
74+
if (ref)
75+
return ref->get_id();
76+
7577
return LLDB_INVALID_UID;
7678
}
7779

lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ uint32_t DWARFDebugInfo::FindUnitIndex(DIERef::Section section,
136136
});
137137
uint32_t idx = std::distance(m_units.begin(), pos);
138138
if (idx == 0)
139-
return DW_INVALID_OFFSET;
139+
return DW_INVALID_INDEX;
140140
return idx - 1;
141141
}
142142

lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data,
6464
"[{0:x16}]: invalid abbreviation code {1}, "
6565
"please file a bug and "
6666
"attach the file at the start of this error message",
67-
m_offset, (unsigned)abbr_idx);
67+
(uint64_t)m_offset, (unsigned)abbr_idx);
6868
// WE can't parse anymore if the DWARF is borked...
6969
*offset_ptr = UINT32_MAX;
7070
return false;
@@ -195,7 +195,7 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data,
195195
"[{0:x16}]: Unsupported DW_FORM_{1:x}, please file a bug "
196196
"and "
197197
"attach the file at the start of this error message",
198-
m_offset, (unsigned)form);
198+
(uint64_t)m_offset, (unsigned)form);
199199
*offset_ptr = m_offset;
200200
return false;
201201
}

lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ class DWARFDebugInfoEntry {
3636
typedef collection::const_iterator const_iterator;
3737

3838
DWARFDebugInfoEntry()
39-
: m_offset(DW_INVALID_OFFSET), m_sibling_idx(0), m_has_children(false) {}
39+
: m_offset(DW_INVALID_OFFSET), m_parent_idx(0), m_sibling_idx(0),
40+
m_has_children(false) {}
4041

4142
explicit operator bool() const { return m_offset != DW_INVALID_OFFSET; }
4243
bool operator==(const DWARFDebugInfoEntry &rhs) const;
@@ -165,14 +166,16 @@ class DWARFDebugInfoEntry {
165166
static DWARFDeclContext
166167
GetDWARFDeclContextStatic(const DWARFDebugInfoEntry *die, DWARFUnit *cu);
167168

168-
dw_offset_t m_offset; // Offset within the .debug_info/.debug_types
169-
uint32_t m_parent_idx = 0; // How many to subtract from "this" to get the
170-
// parent. If zero this die has no parent
171-
uint32_t m_sibling_idx : 31, // How many to add to "this" to get the sibling.
172-
// If it is zero, then the DIE doesn't have children, or the
173-
// DWARF claimed it had children but the DIE only contained
174-
// a single NULL terminating child.
175-
m_has_children : 1;
169+
// Up to 2TB offset within the .debug_info/.debug_types
170+
dw_offset_t m_offset : DW_DIE_OFFSET_MAX_BITSIZE;
171+
// How many to subtract from "this" to get the parent. If zero this die has no
172+
// parent
173+
dw_offset_t m_parent_idx : 64 - DW_DIE_OFFSET_MAX_BITSIZE;
174+
// How many to add to "this" to get the sibling.
175+
// If it is zero, then the DIE doesn't have children,
176+
// or the DWARF claimed it had children but the DIE
177+
// only contained a single NULL terminating child.
178+
uint32_t m_sibling_idx : 31, m_has_children : 1;
176179
uint16_t m_abbr_idx = 0;
177180
/// A copy of the DW_TAG value so we don't have to go through the compile
178181
/// unit abbrev table

lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
341341
if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
342342
if (const auto *contribution =
343343
entry->getContribution(llvm::DW_SECT_STR_OFFSETS))
344-
baseOffset = contribution->getOffset32();
344+
baseOffset = contribution->getOffset();
345345
else
346346
return;
347347
}
@@ -489,7 +489,7 @@ void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) {
489489
*GetDWOId());
490490
return;
491491
}
492-
offset += contribution->getOffset32();
492+
offset += contribution->getOffset();
493493
}
494494
m_loclists_base = loclists_base;
495495

@@ -527,7 +527,7 @@ DWARFDataExtractor DWARFUnit::GetLocationData() const {
527527
if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
528528
if (const auto *contribution = entry->getContribution(
529529
GetVersion() >= 5 ? llvm::DW_SECT_LOCLISTS : llvm::DW_SECT_EXT_LOC))
530-
return DWARFDataExtractor(data, contribution->getOffset32(),
530+
return DWARFDataExtractor(data, contribution->getOffset(),
531531
contribution->getLength32());
532532
return DWARFDataExtractor();
533533
}
@@ -540,7 +540,7 @@ DWARFDataExtractor DWARFUnit::GetRnglistData() const {
540540
if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
541541
if (const auto *contribution =
542542
entry->getContribution(llvm::DW_SECT_RNGLISTS))
543-
return DWARFDataExtractor(data, contribution->getOffset32(),
543+
return DWARFDataExtractor(data, contribution->getOffset(),
544544
contribution->getLength32());
545545
GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
546546
"Failed to find range list contribution for CU with signature {0:x16}",
@@ -935,7 +935,7 @@ DWARFUnitHeader::extract(const DWARFDataExtractor &data,
935935
llvm::inconvertibleErrorCode(),
936936
"DWARF package index missing abbreviation column");
937937
}
938-
header.m_abbr_offset = abbr_entry->getOffset32();
938+
header.m_abbr_offset = abbr_entry->getOffset();
939939
}
940940

941941
bool length_OK = data.ValidOffset(header.GetNextUnitOffset() - 1);

0 commit comments

Comments
 (0)