Skip to content

Commit 4bdb9ca

Browse files
committed
[lldb] Use on disk hash table as the binary format of SwiftMetadataCache
Replace the previous, simple, compressed list of string offset pairs representation with an OnDiskChainedHashTable. This format can be mmaped and is faster loading up.
1 parent 59f1187 commit 4bdb9ca

File tree

3 files changed

+210
-128
lines changed

3 files changed

+210
-128
lines changed

lldb/source/Core/CoreProperties.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ let Definition = "modulelist" in {
3232
Desc<"The module loading mode to use when loading modules for Swift.">;
3333
def EnableSwiftMetadataCache: Property<"enable-swift-metadata-cache", "Boolean">,
3434
Global,
35-
DefaultFalse,
35+
DefaultTrue,
3636
Desc<"Enable caching for Swift reflection metadata in LLDB.">;
3737
def SwiftMetadataCachePath: Property<"swift-metadata-cache-path", "FileSpec">,
3838
Global,

lldb/source/Plugins/LanguageRuntime/Swift/SwiftMetadataCache.cpp

Lines changed: 95 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,9 @@
22

33
#include "lldb/Utility/DataEncoder.h"
44
#include "lldb/Utility/LLDBLog.h"
5-
#include "lldb/Utility/Log.h"
65
#include "lldb/Version/Version.h"
7-
#include "llvm/CodeGen/AccelTable.h"
8-
#include "llvm/CodeGen/AsmPrinter.h"
96
#include "llvm/Support/BLAKE3.h"
107
#include "llvm/Support/CachePruning.h"
11-
#include "llvm/Support/Compression.h"
128

139
using namespace lldb;
1410
using namespace lldb_private;
@@ -32,9 +28,7 @@ SwiftMetadataCache::SwiftMetadataCache() {
3228
}
3329
}
3430

35-
bool SwiftMetadataCache::is_enabled() {
36-
return llvm::zlib::isAvailable() && m_data_file_cache.hasValue();
37-
}
31+
bool SwiftMetadataCache::is_enabled() { return m_data_file_cache.hasValue(); }
3832

3933
void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
4034
uint64_t info_id) {
@@ -44,7 +38,8 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
4438
if (!is_enabled())
4539
return;
4640

47-
m_info_to_module[info_id] = {module, false};
41+
/// Insert the module cache info as not processed.
42+
m_reflection_info_to_module.insert({info_id, module});
4843

4944
// Attempt to load the cached file.
5045
auto module_name = getTyperefCacheFileNameForModule(module);
@@ -57,17 +52,32 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
5752
return;
5853
}
5954

55+
// Move it to the instance variable so references to this data don't go
56+
// out of scope.
57+
m_hash_table_buffers.emplace_back(std::move(mem_buffer_up));
58+
auto &mem_buffer = m_hash_table_buffers.back();
59+
6060
// Extractor used to extract the header information (see the .h file for
6161
// details on the format).
62-
DataExtractor header_extractor(mem_buffer_up->getBufferStart(),
63-
mem_buffer_up->getBufferSize(),
62+
DataExtractor header_extractor(mem_buffer->getBufferStart(),
63+
mem_buffer->getBufferSize(),
6464
module->GetObjectFile()->GetByteOrder(),
6565
module->GetObjectFile()->GetAddressByteSize());
6666

6767
lldb::offset_t read_offset = 0;
68+
uint16_t cached_UUID_size = 0;
69+
if (!header_extractor.GetU16(&read_offset, &cached_UUID_size, 1)) {
70+
LLDB_LOG(log,
71+
"[SwiftMetadataCache] Failed to read cached UUID size for module {0}.",
72+
module->GetFileSpec().GetFilename());
73+
m_data_file_cache->RemoveCacheFile(module_name);
74+
return;
75+
}
6876

69-
std::string UUID = module->GetUUID().GetAsString();
70-
std::string cached_UUID = header_extractor.GetCStr(&read_offset);
77+
const auto *cached_UUID_data = reinterpret_cast<const uint8_t *>(
78+
header_extractor.GetData(&read_offset, cached_UUID_size));
79+
80+
llvm::ArrayRef<uint8_t> cached_UUID(cached_UUID_data, cached_UUID_size);
7181
// If no uuid in the file something is wrong with the cache.
7282
if (cached_UUID.empty()) {
7383
LLDB_LOG(log,
@@ -77,6 +87,7 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
7787
return;
7888
}
7989

90+
auto UUID = module->GetUUID().GetBytes();
8091
// If the UUIDs don't match this is most likely a stale cache.
8192
if (cached_UUID != UUID) {
8293
LLDB_LOGV(log, "[SwiftMetadataCache] Module UUID mismatch for {0}.",
@@ -85,73 +96,31 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
8596
return;
8697
}
8798

88-
uint64_t expanded_size = 0;
89-
if (!header_extractor.GetU64(&read_offset, &expanded_size, 1)) {
99+
// The on disk hash table must have a 4-byte alignment, skip
100+
// the padding when reading.
101+
read_offset = llvm::alignTo(read_offset, 4);
102+
103+
// The offset of the hash table control structure, which follows the payload.
104+
uint32_t table_control_offset = 0;
105+
if (!header_extractor.GetU32(&read_offset, &table_control_offset, 1)) {
90106
LLDB_LOGV(log,
91-
"[SwiftMetadataCache] Failed to read decompressed cache size for "
107+
"[SwiftMetadataCache] Failed to read table offset for "
92108
"module {0}.",
93109
module->GetFileSpec().GetFilename());
94110
m_data_file_cache->RemoveCacheFile(module_name);
95111
return;
96112
}
97113

98-
const auto *start = (const char *)header_extractor.GetData(&read_offset, 0);
99-
// Create a reference to the compressed data.
100-
llvm::StringRef string_buffer(start, (uint64_t)mem_buffer_up->getBufferEnd() -
101-
(uint64_t)start);
102-
103-
llvm::SmallString<0> decompressed;
104-
auto error =
105-
llvm::zlib::uncompress(string_buffer, decompressed, expanded_size);
106-
if (error) {
107-
auto error_string = llvm::toString(std::move(error));
108-
LLDB_LOG(log,
109-
"[SwiftMetadataCache] Cache decompression failed with error: {0}. "
110-
"Deleting cached file.",
111-
error_string);
112-
m_data_file_cache->RemoveCacheFile(module_name);
113-
return;
114-
}
114+
const auto *table_contents = reinterpret_cast<const uint8_t *>(
115+
header_extractor.GetData(&read_offset, 0));
115116

116-
// Extractor to extract the body of the cached file (see SwiftMetadataCache.h
117-
// for more details of the format).
118-
DataExtractor body_extractor(decompressed.data(), decompressed.size(),
119-
module->GetObjectFile()->GetByteOrder(),
120-
module->GetObjectFile()->GetAddressByteSize());
121-
read_offset = 0;
122-
auto num_entries = body_extractor.GetU64(&read_offset);
123-
124-
// Map to extract the encoded data to. Since extraction can fail we don't want
125-
// to insert values into the final map in case we have to abort midway.
126-
llvm::StringMap<swift::remote::FieldDescriptorLocator> temp_map;
127-
for (size_t i = 0; i < num_entries; i++) {
128-
const auto *mangled_name = body_extractor.GetCStr(&read_offset);
129-
if (!mangled_name) {
130-
LLDB_LOG(log,
131-
"[SwiftMetadataCache] Failed to read mangled name {0} at offset "
132-
"{1} for module {2}.",
133-
i, read_offset, module->GetFileSpec().GetFilename());
134-
m_data_file_cache->RemoveCacheFile(module_name);
135-
return;
136-
}
137-
uint64_t offset = 0;
138-
if (!body_extractor.GetU64(&read_offset, &offset, 1)) {
139-
LLDB_LOG(log,
140-
"[SwiftMetadataCache] Failed to read mangled name {0} at offset "
141-
"{1} for module {2}.",
142-
i, read_offset, module->GetFileSpec().GetFilename());
143-
m_data_file_cache->RemoveCacheFile(module_name);
144-
return;
145-
}
146-
temp_map[mangled_name] = {info_id, offset};
147-
}
117+
const auto *table_control = table_contents + table_control_offset;
148118

149-
// Move the values to the actual map now that we know that it's safe.
150-
for (auto &p : temp_map)
151-
m_mangled_name_to_offset.try_emplace(p.getKey(), p.second);
119+
// Store the hash table.
120+
m_reflection_info_to_module.find(info_id)->second.cache_hash_table.reset(
121+
llvm::OnDiskChainedHashTable<TypeRefInfo>::Create(
122+
table_control, table_contents, m_info));
152123

153-
// Mark this reflection info as processed.
154-
m_info_to_module[info_id] = {module, true};
155124
LLDB_LOGV(log, "[SwiftMetadataCache] Loaded cache for module {0}.",
156125
module->GetFileSpec().GetFilename());
157126
}
@@ -167,33 +136,38 @@ static bool areMangledNamesAndFieldSectionSameSize(
167136
return field_descriptors_size == mangled_names.size();
168137
}
169138

170-
bool SwiftMetadataCache::writeMangledNamesAndOffsetsToEncoder(
139+
llvm::Optional<std::pair<uint32_t, llvm::SmallString<32>>>
140+
SwiftMetadataCache::generateHashTableBlob(
171141
uint64_t info_id, const swift::reflection::FieldSection &field_descriptors,
172-
const std::vector<std::string> &mangled_names, DataEncoder &encoder) {
142+
const std::vector<std::string> &mangled_names) {
173143
Log *log = GetLog(LLDBLog::Types);
174-
auto num_entries = mangled_names.size();
175-
encoder.AppendU64(num_entries);
144+
llvm::SmallString<32> hash_table_blob;
145+
llvm::raw_svector_ostream blobStream(hash_table_blob);
176146

177147
// If the amount of mangled names and field descriptors don't match something
178148
// unexpected happened.
179149
if (!areMangledNamesAndFieldSectionSameSize(field_descriptors,
180150
mangled_names)) {
181151
LLDB_LOG(log, "[SwiftMetadataCache] Mismatch between number of mangled "
182152
"names and field descriptors passed in.");
183-
return false;
153+
return {};
184154
}
185155

156+
llvm::OnDiskChainedHashTableGenerator<TypeRefInfo> table_generator;
186157
for (auto pair : llvm::zip(field_descriptors, mangled_names)) {
187158
auto field_descriptor = std::get<0>(pair);
188159
auto &mangled_name = std::get<1>(pair);
189160
if (mangled_name.empty())
190161
continue;
191162
auto offset = field_descriptor.getAddressData() -
192163
field_descriptors.startAddress().getAddressData();
193-
encoder.AppendCString(mangled_name.data());
194-
encoder.AppendU64(offset);
164+
table_generator.insert(mangled_name, offset, m_info);
195165
}
196-
return true;
166+
167+
// Make sure that no bucket is at offset 0.
168+
llvm::support::endian::write<uint32_t>(blobStream, 0, llvm::support::little);
169+
uint32_t table_control_offset = table_generator.Emit(blobStream, m_info);
170+
return {{std::move(table_control_offset), std::move(hash_table_blob)}};
197171
}
198172

199173
void SwiftMetadataCache::cacheFieldDescriptors(
@@ -205,33 +179,40 @@ void SwiftMetadataCache::cacheFieldDescriptors(
205179
if (!is_enabled())
206180
return;
207181

208-
auto it = m_info_to_module.find(info_id);
209-
if (it == m_info_to_module.end()) {
182+
auto it = m_reflection_info_to_module.find(info_id);
183+
if (it == m_reflection_info_to_module.end()) {
210184
LLDB_LOGV(log, "[SwiftMetadataCache] No module found with module id {0}.",
211185
info_id);
212186
return;
213187
}
214188

215-
auto module = std::get<ModuleSP>(it->second);
216-
// Write the data to the body encoder with the format expected by the current
217-
// cache version.
218-
DataEncoder body_encoder;
219-
if (!writeMangledNamesAndOffsetsToEncoder(info_id, field_descriptors,
220-
mangled_names, body_encoder))
221-
return;
189+
auto &module = it->second.module;
222190

223-
uint64_t typeref_buffer_size = body_encoder.GetData().size();
224-
llvm::StringRef typeref_buffer((const char *)body_encoder.GetData().data(),
225-
typeref_buffer_size);
191+
auto maybe_pair =
192+
generateHashTableBlob(info_id, field_descriptors, mangled_names);
193+
if (!maybe_pair)
194+
return;
226195

227-
llvm::SmallString<0> compressed_buffer;
228-
llvm::zlib::compress(typeref_buffer, compressed_buffer);
196+
auto &table_offset = maybe_pair->first;
197+
auto &hash_table_blob = maybe_pair->second;
229198

230199
// Write the header followed by the body.
231200
DataEncoder encoder;
232-
encoder.AppendCString(module->GetUUID().GetAsString());
233-
encoder.AppendU64(typeref_buffer_size);
234-
encoder.AppendData(compressed_buffer);
201+
auto uuid = module->GetUUID().GetBytes();
202+
// Append the uuid size followed by the uuid itself.
203+
encoder.AppendU16(uuid.size());
204+
encoder.AppendData(uuid);
205+
206+
207+
auto size_so_far = encoder.GetByteSize();
208+
// The on disk hash table must have a 4-byte alignment, so
209+
// write 0 bytes until we get to the required alignemnt.
210+
auto padding = llvm::alignTo(size_so_far, 4) - size_so_far;
211+
while (padding-- > 0)
212+
encoder.AppendU8(0);
213+
214+
encoder.AppendU32(table_offset);
215+
encoder.AppendData(hash_table_blob);
235216

236217
auto filename = getTyperefCacheFileNameForModule(module);
237218

@@ -244,24 +225,33 @@ llvm::Optional<swift::remote::FieldDescriptorLocator>
244225
SwiftMetadataCache::getFieldDescriptorLocator(const std::string &Name) {
245226
std::lock_guard<std::recursive_mutex> guard(m_mutex);
246227
Log *log = GetLog(LLDBLog::Types);
247-
auto it = m_mangled_name_to_offset.find(Name);
248-
if (it != m_mangled_name_to_offset.end()) {
249-
LLDB_LOGV(
250-
log,
251-
"[SwiftMetadataCache] Returning field descriptor for mangled name {0}",
252-
Name);
253-
return it->second;
228+
// Compute hash outside of loop as an optimization.
229+
auto hash = m_info.ComputeHash(Name);
230+
for (auto &pair : m_reflection_info_to_module) {
231+
auto &cache_hash_table = pair.second.cache_hash_table;
232+
// No cache for this reflection module.
233+
if (!cache_hash_table)
234+
continue;
235+
auto it = cache_hash_table->find_hashed(Name, hash, &m_info);
236+
if (it != cache_hash_table->end()) {
237+
LLDB_LOGV(log,
238+
"[SwiftMetadataCache] Returning field descriptor for mangled "
239+
"name {0}",
240+
Name);
241+
auto info_id = pair.first;
242+
return {{info_id, *it}};
243+
}
254244
}
255245
return {};
256246
}
257247

258248
bool SwiftMetadataCache::isReflectionInfoCached(uint64_t info_id) {
259249
std::lock_guard<std::recursive_mutex> guard(m_mutex);
260-
auto it = m_info_to_module.find(info_id);
250+
auto it = m_reflection_info_to_module.find(info_id);
261251
// First check if we've registered the reflection info with that id.
262-
if (it != m_info_to_module.end())
263-
// Then check whether we've already parsed it or not.
264-
return std::get<bool>(it->second);
252+
if (it != m_reflection_info_to_module.end())
253+
// Then check whether we have a cache for it or not.
254+
return it->second.cache_hash_table.get() != nullptr;
265255
return false;
266256
}
267257

0 commit comments

Comments
 (0)