Skip to content

[lldb] Preparation for DWARF indexing speedup #123732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN
DWARFUnit.cpp
LogChannelDWARF.cpp
ManualDWARFIndex.cpp
ManualDWARFIndexSet.cpp
NameToDIE.cpp
SymbolFileDWARF.cpp
SymbolFileDWARFDwo.cpp
Expand Down
181 changes: 20 additions & 161 deletions lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ void ManualDWARFIndex::Index() {
lldb::eDescriptionLevelBrief);

// Include 2 passes per unit to index for extracting DIEs from the unit and
// indexing the unit, and then 8 extra entries for finalizing each index set.
const uint64_t total_progress = units_to_index.size() * 2 + 8;
// indexing the unit, and then extra entries for finalizing each index in the
// set.
const auto indices = IndexSet<NameToDIE>::Indices();
const uint64_t total_progress = units_to_index.size() * 2 + indices.size();
Progress progress("Manually indexing DWARF", module_desc.GetData(),
total_progress, /*debugger=*/nullptr,
Progress::kDefaultHighFrequencyReportTime);
Expand Down Expand Up @@ -122,37 +124,30 @@ void ManualDWARFIndex::Index() {
});

// Now index all DWARF unit in parallel.
std::vector<IndexSet> sets(num_threads);
std::vector<IndexSet<NameToDIE>> sets(num_threads);
for_each_unit(
[this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
});

// Merge partial indexes into a single index. Process each index in a set in
// parallel.
auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
NameToDIE &result = m_set.*index;
for (auto &set : sets)
result.Append(set.*index);
result.Finalize();
progress.Increment();
};

task_group.async(finalize_fn, &IndexSet::function_basenames);
task_group.async(finalize_fn, &IndexSet::function_fullnames);
task_group.async(finalize_fn, &IndexSet::function_methods);
task_group.async(finalize_fn, &IndexSet::function_selectors);
task_group.async(finalize_fn, &IndexSet::objc_class_selectors);
task_group.async(finalize_fn, &IndexSet::globals);
task_group.async(finalize_fn, &IndexSet::types);
task_group.async(finalize_fn, &IndexSet::namespaces);
for (NameToDIE IndexSet<NameToDIE>::*index : indices) {
task_group.async([this, &sets, index, &progress]() {
NameToDIE &result = m_set.*index;
for (auto &set : sets)
result.Append(set.*index);
result.Finalize();
progress.Increment();
});
}
task_group.wait();

SaveToCache();
}

void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
IndexSet &set) {
IndexSet<NameToDIE> &set) {
Log *log = GetLog(DWARFLog::Lookups);

if (log) {
Expand Down Expand Up @@ -210,7 +205,7 @@ void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,

void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
const LanguageType cu_language,
IndexSet &set) {
IndexSet<NameToDIE> &set) {
for (const DWARFDebugInfoEntry &die : unit.dies()) {
const dw_tag_t tag = die.Tag();

Expand Down Expand Up @@ -555,142 +550,6 @@ void ManualDWARFIndex::Dump(Stream &s) {
m_set.namespaces.Dump(&s);
}

constexpr llvm::StringLiteral kIdentifierManualDWARFIndex("DIDX");
// Define IDs for the different tables when encoding and decoding the
// ManualDWARFIndex NameToDIE objects so we can avoid saving any empty maps.
enum DataID {
kDataIDFunctionBasenames = 1u,
kDataIDFunctionFullnames,
kDataIDFunctionMethods,
kDataIDFunctionSelectors,
kDataIDFunctionObjcClassSelectors,
kDataIDGlobals,
kDataIDTypes,
kDataIDNamespaces,
kDataIDEnd = 255u,

};

// Version 2 changes the encoding of DIERef objects used in the DWARF manual
// index name tables. See DIERef class for details.
constexpr uint32_t CURRENT_CACHE_VERSION = 2;

bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data,
lldb::offset_t *offset_ptr) {
StringTableReader strtab;
// We now decode the string table for all strings in the data cache file.
if (!strtab.Decode(data, offset_ptr))
return false;

llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
if (identifier != kIdentifierManualDWARFIndex)
return false;
const uint32_t version = data.GetU32(offset_ptr);
if (version != CURRENT_CACHE_VERSION)
return false;

bool done = false;
while (!done) {
switch (data.GetU8(offset_ptr)) {
default:
// If we got here, this is not expected, we expect the data IDs to match
// one of the values from the DataID enumeration.
return false;
case kDataIDFunctionBasenames:
if (!function_basenames.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDFunctionFullnames:
if (!function_fullnames.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDFunctionMethods:
if (!function_methods.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDFunctionSelectors:
if (!function_selectors.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDFunctionObjcClassSelectors:
if (!objc_class_selectors.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDGlobals:
if (!globals.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDTypes:
if (!types.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDNamespaces:
if (!namespaces.Decode(data, offset_ptr, strtab))
return false;
break;
case kDataIDEnd:
// We got to the end of our NameToDIE encodings.
done = true;
break;
}
}
// Success!
return true;
}

void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const {
ConstStringTable strtab;

// Encoder the DWARF index into a separate encoder first. This allows us
// gather all of the strings we willl need in "strtab" as we will need to
// write the string table out before the symbol table.
DataEncoder index_encoder(encoder.GetByteOrder(),
encoder.GetAddressByteSize());

index_encoder.AppendData(kIdentifierManualDWARFIndex);
// Encode the data version.
index_encoder.AppendU32(CURRENT_CACHE_VERSION);

if (!function_basenames.IsEmpty()) {
index_encoder.AppendU8(kDataIDFunctionBasenames);
function_basenames.Encode(index_encoder, strtab);
}
if (!function_fullnames.IsEmpty()) {
index_encoder.AppendU8(kDataIDFunctionFullnames);
function_fullnames.Encode(index_encoder, strtab);
}
if (!function_methods.IsEmpty()) {
index_encoder.AppendU8(kDataIDFunctionMethods);
function_methods.Encode(index_encoder, strtab);
}
if (!function_selectors.IsEmpty()) {
index_encoder.AppendU8(kDataIDFunctionSelectors);
function_selectors.Encode(index_encoder, strtab);
}
if (!objc_class_selectors.IsEmpty()) {
index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors);
objc_class_selectors.Encode(index_encoder, strtab);
}
if (!globals.IsEmpty()) {
index_encoder.AppendU8(kDataIDGlobals);
globals.Encode(index_encoder, strtab);
}
if (!types.IsEmpty()) {
index_encoder.AppendU8(kDataIDTypes);
types.Encode(index_encoder, strtab);
}
if (!namespaces.IsEmpty()) {
index_encoder.AppendU8(kDataIDNamespaces);
namespaces.Encode(index_encoder, strtab);
}
index_encoder.AppendU8(kDataIDEnd);

// Now that all strings have been gathered, we will emit the string table.
strtab.Encode(encoder);
// Followed by the symbol table data.
encoder.AppendData(index_encoder.GetData());
}

bool ManualDWARFIndex::Decode(const DataExtractor &data,
lldb::offset_t *offset_ptr,
bool &signature_mismatch) {
Expand All @@ -702,18 +561,18 @@ bool ManualDWARFIndex::Decode(const DataExtractor &data,
signature_mismatch = true;
return false;
}
IndexSet set;
if (!set.Decode(data, offset_ptr))
std::optional<IndexSet<NameToDIE>> set = DecodeIndexSet(data, offset_ptr);
if (!set)
return false;
m_set = std::move(set);
m_set = std::move(*set);
return true;
}

bool ManualDWARFIndex::Encode(DataEncoder &encoder) const {
CacheSignature signature(m_dwarf->GetObjectFile());
if (!signature.Encode(encoder))
return false;
m_set.Encode(encoder);
EncodeIndexSet(m_set, encoder);
return true;
}

Expand Down
31 changes: 5 additions & 26 deletions lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_MANUALDWARFINDEX_H

#include "Plugins/SymbolFile/DWARF/DWARFIndex.h"
#include "Plugins/SymbolFile/DWARF/ManualDWARFIndexSet.h"
#include "Plugins/SymbolFile/DWARF/NameToDIE.h"
#include "llvm/ADT/DenseSet.h"

Expand Down Expand Up @@ -58,29 +59,6 @@ class ManualDWARFIndex : public DWARFIndex {

void Dump(Stream &s) override;

// Make IndexSet public so we can unit test the encoding and decoding logic.
struct IndexSet {
NameToDIE function_basenames;
NameToDIE function_fullnames;
NameToDIE function_methods;
NameToDIE function_selectors;
NameToDIE objc_class_selectors;
NameToDIE globals;
NameToDIE types;
NameToDIE namespaces;
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr);
void Encode(DataEncoder &encoder) const;
bool operator==(const IndexSet &rhs) const {
return function_basenames == rhs.function_basenames &&
function_fullnames == rhs.function_fullnames &&
function_methods == rhs.function_methods &&
function_selectors == rhs.function_selectors &&
objc_class_selectors == rhs.objc_class_selectors &&
globals == rhs.globals && types == rhs.types &&
namespaces == rhs.namespaces;
}
};

private:
void Index();

Expand Down Expand Up @@ -162,11 +140,12 @@ class ManualDWARFIndex : public DWARFIndex {
/// false if the symbol table wasn't cached or was out of date.
bool LoadFromCache();

void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set);
void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
IndexSet<NameToDIE> &set);

static void IndexUnitImpl(DWARFUnit &unit,
const lldb::LanguageType cu_language,
IndexSet &set);
IndexSet<NameToDIE> &set);

/// Return true if this manual DWARF index is covering only part of the DWARF.
///
Expand All @@ -184,7 +163,7 @@ class ManualDWARFIndex : public DWARFIndex {
llvm::DenseSet<dw_offset_t> m_units_to_avoid;
llvm::DenseSet<uint64_t> m_type_sigs_to_avoid;

IndexSet m_set;
IndexSet<NameToDIE> m_set;
bool m_indexed = false;
};
} // namespace dwarf
Expand Down
Loading
Loading