Skip to content

Commit fb14129

Browse files
committed
[ELF] --gdb-index: skip SHF_GROUP .debug_info
-gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections. All except one are type units (.debug_types before DWARF v5). When constructing .gdb_index, we should ignore these type units. We use a simple heuristic: the compile unit does not have the SHF_GROUP flag. (This needs to be revisited if people place compile unit .debug_info in COMDAT groups.) This issue manifests as a data race: because an object file may have multiple .debug_info sections, we may concurrently construct `LLDDwarfObj` for the same file in multiple threads. The threads may access `InputSectionBase::data()` concurrently on the same input section. `InputSectionBase::data()` does a lazy uncompress() and rewrites the member variable `rawData`. A thread running zlib `inflate()` (transitively called by uncompress()) on a buffer with `rawData` tampered by another thread may fail with `uncompress failed: zlib error: Z_DATA_ERROR`. Even if no data race occurred in an optimistic run, if there are N .debug_info, one CU entry and its address ranges will be replicated N times. The result .gdb_index can be much larger than a correct one. The new test gdb-index-dwarf5-type-unit.s actually has two compile units. This cannot be produced with regular approaches (it can be produced with -r --unique). This is used to demonstrate that the .gdb_index construction code only considers the last non-SHF_GROUP .debug_info Reviewed By: grimar Differential Revision: https://reviews.llvm.org/D85579
1 parent 19d7cc2 commit fb14129

File tree

4 files changed

+142
-26
lines changed

4 files changed

+142
-26
lines changed

lld/ELF/DWARF.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ using namespace lld;
2626
using namespace lld::elf;
2727

2828
template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
29-
for (InputSectionBase *sec : obj->getSections()) {
29+
// Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below.
30+
ArrayRef<typename ELFT::Shdr> objSections =
31+
CHECK(obj->getObj().sections(), obj);
32+
assert(objSections.size() == obj->getSections().size());
33+
for (auto it : llvm::enumerate(obj->getSections())) {
34+
InputSectionBase *sec = it.value();
3035
if (!sec)
3136
continue;
3237

@@ -35,7 +40,6 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
3540
.Case(".debug_addr", &addrSection)
3641
.Case(".debug_gnu_pubnames", &gnuPubnamesSection)
3742
.Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
38-
.Case(".debug_info", &infoSection)
3943
.Case(".debug_loclists", &loclistsSection)
4044
.Case(".debug_ranges", &rangesSection)
4145
.Case(".debug_rnglists", &rnglistsSection)
@@ -53,6 +57,20 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
5357
strSection = toStringRef(sec->data());
5458
else if (sec->name == ".debug_line_str")
5559
lineStrSection = toStringRef(sec->data());
60+
else if (sec->name == ".debug_info" &&
61+
!(objSections[it.index()].sh_flags & ELF::SHF_GROUP)) {
62+
// In DWARF v5, -fdebug-types-section places type units in .debug_info
63+
// sections in COMDAT groups. They are not compile units and thus should
64+
// be ignored for .gdb_index/diagnostics purposes.
65+
//
66+
// We use a simple heuristic: the compile unit does not have the SHF_GROUP
67+
// flag. If we place compile units in COMDAT groups in the future, we may
68+
// need to perform a lightweight parsing. We drop the SHF_GROUP flag when
69+
// the InputSection was created, so we need to retrieve sh_flags from the
70+
// associated ELF section header.
71+
infoSection.Data = toStringRef(sec->data());
72+
infoSection.sec = sec;
73+
}
5674
}
5775
}
5876

lld/ELF/DWARF.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
3232
f(infoSection);
3333
}
3434

35+
InputSection *getInfoSection() const {
36+
return cast<InputSection>(infoSection.sec);
37+
}
38+
3539
const llvm::DWARFSection &getLoclistsSection() const override {
3640
return loclistsSection;
3741
}

lld/ELF/SyntheticSections.cpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2695,15 +2695,6 @@ void GdbIndexSection::initOutputSize() {
26952695
}
26962696
}
26972697

2698-
static std::vector<InputSection *> getDebugInfoSections() {
2699-
std::vector<InputSection *> ret;
2700-
for (InputSectionBase *s : inputSections)
2701-
if (InputSection *isec = dyn_cast<InputSection>(s))
2702-
if (isec->name == ".debug_info")
2703-
ret.push_back(isec);
2704-
return ret;
2705-
}
2706-
27072698
static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &dwarf) {
27082699
std::vector<GdbIndexSection::CuEntry> ret;
27092700
for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units())
@@ -2857,30 +2848,40 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,
28572848

28582849
// Returns a newly-created .gdb_index section.
28592850
template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
2860-
std::vector<InputSection *> sections = getDebugInfoSections();
2861-
2862-
// .debug_gnu_pub{names,types} are useless in executables.
2863-
// They are present in input object files solely for creating
2864-
// a .gdb_index. So we can remove them from the output.
2865-
for (InputSectionBase *s : inputSections)
2851+
// Collect InputFiles with .debug_info. See the comment in
2852+
// LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future,
2853+
// note that isec->data() may uncompress the full content, which should be
2854+
// parallelized.
2855+
SetVector<InputFile *> files;
2856+
for (InputSectionBase *s : inputSections) {
2857+
InputSection *isec = dyn_cast<InputSection>(s);
2858+
if (!isec)
2859+
continue;
2860+
// .debug_gnu_pub{names,types} are useless in executables.
2861+
// They are present in input object files solely for creating
2862+
// a .gdb_index. So we can remove them from the output.
28662863
if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes")
28672864
s->markDead();
2865+
else if (isec->name == ".debug_info")
2866+
files.insert(isec->file);
2867+
}
28682868

2869-
std::vector<GdbChunk> chunks(sections.size());
2870-
std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size());
2869+
std::vector<GdbChunk> chunks(files.size());
2870+
std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size());
28712871

2872-
parallelForEachN(0, sections.size(), [&](size_t i) {
2872+
parallelForEachN(0, files.size(), [&](size_t i) {
28732873
// To keep memory usage low, we don't want to keep cached DWARFContext, so
28742874
// avoid getDwarf() here.
2875-
ObjFile<ELFT> *file = sections[i]->getFile<ELFT>();
2875+
ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);
28762876
DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file));
2877+
auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj());
28772878

2878-
chunks[i].sec = sections[i];
2879+
// If the are multiple compile units .debug_info (very rare ld -r --unique),
2880+
// this only picks the last one. Other address ranges are lost.
2881+
chunks[i].sec = dobj.getInfoSection();
28792882
chunks[i].compilationUnits = readCuList(dwarf);
2880-
chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]);
2881-
nameAttrs[i] = readPubNamesAndTypes<ELFT>(
2882-
static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()),
2883-
chunks[i].compilationUnits);
2883+
chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec);
2884+
nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits);
28842885
});
28852886

28862887
auto *ret = make<GdbIndexSection>();
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# REQUIRES: x86, zlib
2+
## -gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections.
3+
## All except one are type units. Test we can locate the compile unit, add it to
4+
## the index, and not erroneously duplicate it (which would happen if we
5+
## consider every .debug_info a compile unit).
6+
7+
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
8+
# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t
9+
# RUN: llvm-dwarfdump --gdb-index %t | FileCheck %s
10+
11+
## Test we don't uncompress a section while another thread is concurrently
12+
## accessing it. This would be detected by tsan as a data race.
13+
# RUN: llvm-objcopy --compress-debug-sections %t.o
14+
# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t1
15+
# RUN: llvm-dwarfdump --gdb-index %t1 | FileCheck %s
16+
17+
## In this test, there are actually two compile unit .debug_info (very uncommon;
18+
## -r --unique). Currently we only handle the last compile unit.
19+
# CHECK: CU list offset = 0x18, has 1 entries:
20+
# CHECK-NEXT: 0: Offset = 0x32, Length = 0x19
21+
22+
# CHECK: Address area offset = 0x28, has 1 entries:
23+
# CHECK-NEXT: Low/High address = [0x1001, 0x1002) (Size: 0x1), CU id = 0
24+
25+
.Lfunc_begin0:
26+
ret
27+
.Lfunc_end0:
28+
.Lfunc_begin1:
29+
ret
30+
.Lfunc_end1:
31+
32+
.section .debug_abbrev,"",@progbits
33+
.byte 1 # Abbreviation Code
34+
.byte 65 # DW_TAG_type_unit
35+
.byte 0 # DW_CHILDREN_no
36+
.byte 0 # EOM(1)
37+
.byte 0 # EOM(2)
38+
39+
.byte 2 # Abbreviation Code
40+
.byte 17 # DW_TAG_compile_unit
41+
.byte 0 # DW_CHILDREN_no
42+
.byte 17 # DW_AT_low_pc
43+
.byte 1 # DW_FORM_addr
44+
.byte 18 # DW_AT_high_pc
45+
.byte 6 # DW_FORM_data4
46+
.byte 0 # EOM(1)
47+
.byte 0 # EOM(2)
48+
49+
.byte 0 # EOM(3)
50+
51+
.macro TYPE_UNIT id signature
52+
.section .debug_info,"G",@progbits,\signature
53+
.long .Ldebug_info_end\id-.Ldebug_info_start\id # Length of Unit
54+
.Ldebug_info_start\id:
55+
.short 5 # DWARF version number
56+
.byte 2 # DWARF Unit Type
57+
.byte 8 # Address Size
58+
.long .debug_abbrev # Offset Into Abbrev. Section
59+
.quad \signature # Type Signature
60+
.long .Ldebug_info_end\id # Type DIE Offset
61+
.byte 1 # Abbrev [1] DW_TAG_type_unit
62+
.Ldebug_info_end\id:
63+
.endm
64+
65+
## We place compile units between two type units (rare). A naive approach will
66+
## take either the first or the last .debug_info
67+
TYPE_UNIT 0, 123
68+
69+
.section .debug_info,"",@progbits,unique,0
70+
.Lcu_begin0:
71+
.long .Lcu_end0-.Lcu_begin0-4 # Length of Unit
72+
.short 5 # DWARF version number
73+
.byte 1 # DWARF Unit Type
74+
.byte 8 # Address Size
75+
.long .debug_abbrev # Offset Into Abbrev. Section
76+
.byte 2 # Abbrev [2] DW_TAG_compile_unit
77+
.quad .Lfunc_begin0 # DW_AT_low_pc
78+
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
79+
.Lcu_end0:
80+
81+
.section .debug_info,"",@progbits,unique,1
82+
.Lcu_begin1:
83+
.long .Lcu_end1-.Lcu_begin1-4 # Length of Unit
84+
.short 5 # DWARF version number
85+
.byte 1 # DWARF Unit Type
86+
.byte 8 # Address Size
87+
.long .debug_abbrev # Offset Into Abbrev. Section
88+
.byte 2 # Abbrev [2] DW_TAG_compile_unit
89+
.quad .Lfunc_begin1 # DW_AT_low_pc
90+
.long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
91+
.Lcu_end1:
92+
93+
TYPE_UNIT 1, 456

0 commit comments

Comments
 (0)