Skip to content

Commit 94206c9

Browse files
authored
[lldb] Preparation for DWARF indexing speedup (#123732)
This is part of the work proposed in <https://discourse.llvm.org/t/rfc-speeding-up-dwarf-indexing-again/83979>. One of the change is that the there will be a different structure for holding the partial indexes and the final (consolidated) index. To prepare for this, I'm making the IndexSet structure a template. The index cache encoding/decoding methods are changed into free functions, as they only need to know how to work with the final index. I've moved this functionality to a separate file as all this doesn't really depend on the rest of the ManualDWARFIndex and it needs to be public due to its use in the unit test (both of which indicate that it could be a component of its own).
1 parent 1fd0b41 commit 94206c9

File tree

6 files changed

+240
-195
lines changed

6 files changed

+240
-195
lines changed

lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN
3030
DWARFUnit.cpp
3131
LogChannelDWARF.cpp
3232
ManualDWARFIndex.cpp
33+
ManualDWARFIndexSet.cpp
3334
NameToDIE.cpp
3435
SymbolFileDWARF.cpp
3536
SymbolFileDWARFDwo.cpp

lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp

Lines changed: 20 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ void ManualDWARFIndex::Index() {
7777
lldb::eDescriptionLevelBrief);
7878

7979
// Include 2 passes per unit to index for extracting DIEs from the unit and
80-
// indexing the unit, and then 8 extra entries for finalizing each index set.
81-
const uint64_t total_progress = units_to_index.size() * 2 + 8;
80+
// indexing the unit, and then extra entries for finalizing each index in the
81+
// set.
82+
const auto indices = IndexSet<NameToDIE>::Indices();
83+
const uint64_t total_progress = units_to_index.size() * 2 + indices.size();
8284
Progress progress("Manually indexing DWARF", module_desc.GetData(),
8385
total_progress, /*debugger=*/nullptr,
8486
Progress::kDefaultHighFrequencyReportTime);
@@ -122,37 +124,30 @@ void ManualDWARFIndex::Index() {
122124
});
123125

124126
// Now index all DWARF unit in parallel.
125-
std::vector<IndexSet> sets(num_threads);
127+
std::vector<IndexSet<NameToDIE>> sets(num_threads);
126128
for_each_unit(
127129
[this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
128130
IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
129131
});
130132

131133
// Merge partial indexes into a single index. Process each index in a set in
132134
// parallel.
133-
auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
134-
NameToDIE &result = m_set.*index;
135-
for (auto &set : sets)
136-
result.Append(set.*index);
137-
result.Finalize();
138-
progress.Increment();
139-
};
140-
141-
task_group.async(finalize_fn, &IndexSet::function_basenames);
142-
task_group.async(finalize_fn, &IndexSet::function_fullnames);
143-
task_group.async(finalize_fn, &IndexSet::function_methods);
144-
task_group.async(finalize_fn, &IndexSet::function_selectors);
145-
task_group.async(finalize_fn, &IndexSet::objc_class_selectors);
146-
task_group.async(finalize_fn, &IndexSet::globals);
147-
task_group.async(finalize_fn, &IndexSet::types);
148-
task_group.async(finalize_fn, &IndexSet::namespaces);
135+
for (NameToDIE IndexSet<NameToDIE>::*index : indices) {
136+
task_group.async([this, &sets, index, &progress]() {
137+
NameToDIE &result = m_set.*index;
138+
for (auto &set : sets)
139+
result.Append(set.*index);
140+
result.Finalize();
141+
progress.Increment();
142+
});
143+
}
149144
task_group.wait();
150145

151146
SaveToCache();
152147
}
153148

154149
void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
155-
IndexSet &set) {
150+
IndexSet<NameToDIE> &set) {
156151
Log *log = GetLog(DWARFLog::Lookups);
157152

158153
if (log) {
@@ -210,7 +205,7 @@ void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
210205

211206
void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
212207
const LanguageType cu_language,
213-
IndexSet &set) {
208+
IndexSet<NameToDIE> &set) {
214209
for (const DWARFDebugInfoEntry &die : unit.dies()) {
215210
const dw_tag_t tag = die.Tag();
216211

@@ -555,142 +550,6 @@ void ManualDWARFIndex::Dump(Stream &s) {
555550
m_set.namespaces.Dump(&s);
556551
}
557552

558-
constexpr llvm::StringLiteral kIdentifierManualDWARFIndex("DIDX");
559-
// Define IDs for the different tables when encoding and decoding the
560-
// ManualDWARFIndex NameToDIE objects so we can avoid saving any empty maps.
561-
enum DataID {
562-
kDataIDFunctionBasenames = 1u,
563-
kDataIDFunctionFullnames,
564-
kDataIDFunctionMethods,
565-
kDataIDFunctionSelectors,
566-
kDataIDFunctionObjcClassSelectors,
567-
kDataIDGlobals,
568-
kDataIDTypes,
569-
kDataIDNamespaces,
570-
kDataIDEnd = 255u,
571-
572-
};
573-
574-
// Version 2 changes the encoding of DIERef objects used in the DWARF manual
575-
// index name tables. See DIERef class for details.
576-
constexpr uint32_t CURRENT_CACHE_VERSION = 2;
577-
578-
bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data,
579-
lldb::offset_t *offset_ptr) {
580-
StringTableReader strtab;
581-
// We now decode the string table for all strings in the data cache file.
582-
if (!strtab.Decode(data, offset_ptr))
583-
return false;
584-
585-
llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
586-
if (identifier != kIdentifierManualDWARFIndex)
587-
return false;
588-
const uint32_t version = data.GetU32(offset_ptr);
589-
if (version != CURRENT_CACHE_VERSION)
590-
return false;
591-
592-
bool done = false;
593-
while (!done) {
594-
switch (data.GetU8(offset_ptr)) {
595-
default:
596-
// If we got here, this is not expected, we expect the data IDs to match
597-
// one of the values from the DataID enumeration.
598-
return false;
599-
case kDataIDFunctionBasenames:
600-
if (!function_basenames.Decode(data, offset_ptr, strtab))
601-
return false;
602-
break;
603-
case kDataIDFunctionFullnames:
604-
if (!function_fullnames.Decode(data, offset_ptr, strtab))
605-
return false;
606-
break;
607-
case kDataIDFunctionMethods:
608-
if (!function_methods.Decode(data, offset_ptr, strtab))
609-
return false;
610-
break;
611-
case kDataIDFunctionSelectors:
612-
if (!function_selectors.Decode(data, offset_ptr, strtab))
613-
return false;
614-
break;
615-
case kDataIDFunctionObjcClassSelectors:
616-
if (!objc_class_selectors.Decode(data, offset_ptr, strtab))
617-
return false;
618-
break;
619-
case kDataIDGlobals:
620-
if (!globals.Decode(data, offset_ptr, strtab))
621-
return false;
622-
break;
623-
case kDataIDTypes:
624-
if (!types.Decode(data, offset_ptr, strtab))
625-
return false;
626-
break;
627-
case kDataIDNamespaces:
628-
if (!namespaces.Decode(data, offset_ptr, strtab))
629-
return false;
630-
break;
631-
case kDataIDEnd:
632-
// We got to the end of our NameToDIE encodings.
633-
done = true;
634-
break;
635-
}
636-
}
637-
// Success!
638-
return true;
639-
}
640-
641-
void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const {
642-
ConstStringTable strtab;
643-
644-
// Encoder the DWARF index into a separate encoder first. This allows us
645-
// gather all of the strings we willl need in "strtab" as we will need to
646-
// write the string table out before the symbol table.
647-
DataEncoder index_encoder(encoder.GetByteOrder(),
648-
encoder.GetAddressByteSize());
649-
650-
index_encoder.AppendData(kIdentifierManualDWARFIndex);
651-
// Encode the data version.
652-
index_encoder.AppendU32(CURRENT_CACHE_VERSION);
653-
654-
if (!function_basenames.IsEmpty()) {
655-
index_encoder.AppendU8(kDataIDFunctionBasenames);
656-
function_basenames.Encode(index_encoder, strtab);
657-
}
658-
if (!function_fullnames.IsEmpty()) {
659-
index_encoder.AppendU8(kDataIDFunctionFullnames);
660-
function_fullnames.Encode(index_encoder, strtab);
661-
}
662-
if (!function_methods.IsEmpty()) {
663-
index_encoder.AppendU8(kDataIDFunctionMethods);
664-
function_methods.Encode(index_encoder, strtab);
665-
}
666-
if (!function_selectors.IsEmpty()) {
667-
index_encoder.AppendU8(kDataIDFunctionSelectors);
668-
function_selectors.Encode(index_encoder, strtab);
669-
}
670-
if (!objc_class_selectors.IsEmpty()) {
671-
index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors);
672-
objc_class_selectors.Encode(index_encoder, strtab);
673-
}
674-
if (!globals.IsEmpty()) {
675-
index_encoder.AppendU8(kDataIDGlobals);
676-
globals.Encode(index_encoder, strtab);
677-
}
678-
if (!types.IsEmpty()) {
679-
index_encoder.AppendU8(kDataIDTypes);
680-
types.Encode(index_encoder, strtab);
681-
}
682-
if (!namespaces.IsEmpty()) {
683-
index_encoder.AppendU8(kDataIDNamespaces);
684-
namespaces.Encode(index_encoder, strtab);
685-
}
686-
index_encoder.AppendU8(kDataIDEnd);
687-
688-
// Now that all strings have been gathered, we will emit the string table.
689-
strtab.Encode(encoder);
690-
// Followed by the symbol table data.
691-
encoder.AppendData(index_encoder.GetData());
692-
}
693-
694553
bool ManualDWARFIndex::Decode(const DataExtractor &data,
695554
lldb::offset_t *offset_ptr,
696555
bool &signature_mismatch) {
@@ -702,18 +561,18 @@ bool ManualDWARFIndex::Decode(const DataExtractor &data,
702561
signature_mismatch = true;
703562
return false;
704563
}
705-
IndexSet set;
706-
if (!set.Decode(data, offset_ptr))
564+
std::optional<IndexSet<NameToDIE>> set = DecodeIndexSet(data, offset_ptr);
565+
if (!set)
707566
return false;
708-
m_set = std::move(set);
567+
m_set = std::move(*set);
709568
return true;
710569
}
711570

712571
bool ManualDWARFIndex::Encode(DataEncoder &encoder) const {
713572
CacheSignature signature(m_dwarf->GetObjectFile());
714573
if (!signature.Encode(encoder))
715574
return false;
716-
m_set.Encode(encoder);
575+
EncodeIndexSet(m_set, encoder);
717576
return true;
718577
}
719578

lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H
1111

1212
#include "Plugins/SymbolFile/DWARF/DWARFIndex.h"
13+
#include "Plugins/SymbolFile/DWARF/ManualDWARFIndexSet.h"
1314
#include "Plugins/SymbolFile/DWARF/NameToDIE.h"
1415
#include "llvm/ADT/DenseSet.h"
1516

@@ -58,29 +59,6 @@ class ManualDWARFIndex : public DWARFIndex {
5859

5960
void Dump(Stream &s) override;
6061

61-
// Make IndexSet public so we can unit test the encoding and decoding logic.
62-
struct IndexSet {
63-
NameToDIE function_basenames;
64-
NameToDIE function_fullnames;
65-
NameToDIE function_methods;
66-
NameToDIE function_selectors;
67-
NameToDIE objc_class_selectors;
68-
NameToDIE globals;
69-
NameToDIE types;
70-
NameToDIE namespaces;
71-
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr);
72-
void Encode(DataEncoder &encoder) const;
73-
bool operator==(const IndexSet &rhs) const {
74-
return function_basenames == rhs.function_basenames &&
75-
function_fullnames == rhs.function_fullnames &&
76-
function_methods == rhs.function_methods &&
77-
function_selectors == rhs.function_selectors &&
78-
objc_class_selectors == rhs.objc_class_selectors &&
79-
globals == rhs.globals && types == rhs.types &&
80-
namespaces == rhs.namespaces;
81-
}
82-
};
83-
8462
private:
8563
void Index();
8664

@@ -162,11 +140,12 @@ class ManualDWARFIndex : public DWARFIndex {
162140
/// false if the symbol table wasn't cached or was out of date.
163141
bool LoadFromCache();
164142

165-
void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set);
143+
void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
144+
IndexSet<NameToDIE> &set);
166145

167146
static void IndexUnitImpl(DWARFUnit &unit,
168147
const lldb::LanguageType cu_language,
169-
IndexSet &set);
148+
IndexSet<NameToDIE> &set);
170149

171150
/// Return true if this manual DWARF index is covering only part of the DWARF.
172151
///
@@ -184,7 +163,7 @@ class ManualDWARFIndex : public DWARFIndex {
184163
llvm::DenseSet<dw_offset_t> m_units_to_avoid;
185164
llvm::DenseSet<uint64_t> m_type_sigs_to_avoid;
186165

187-
IndexSet m_set;
166+
IndexSet<NameToDIE> m_set;
188167
bool m_indexed = false;
189168
};
190169
} // namespace dwarf

0 commit comments

Comments
 (0)