Skip to content

Commit b672c77

Browse files
authored
Merge pull request #5154 from augusto2112/swift-persistent-meta-cache
[lldb] Use on disk hash table as the binary format of SwiftMetadataCache
2 parents b69b0f8 + 4bdb9ca commit b672c77

File tree

3 files changed

+210
-128
lines changed

3 files changed

+210
-128
lines changed

lldb/source/Core/CoreProperties.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ let Definition = "modulelist" in {
3232
Desc<"The module loading mode to use when loading modules for Swift.">;
3333
def EnableSwiftMetadataCache: Property<"enable-swift-metadata-cache", "Boolean">,
3434
Global,
35-
DefaultFalse,
35+
DefaultTrue,
3636
Desc<"Enable caching for Swift reflection metadata in LLDB.">;
3737
def SwiftMetadataCachePath: Property<"swift-metadata-cache-path", "FileSpec">,
3838
Global,

lldb/source/Plugins/LanguageRuntime/Swift/SwiftMetadataCache.cpp

Lines changed: 95 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,9 @@
22

33
#include "lldb/Utility/DataEncoder.h"
44
#include "lldb/Utility/LLDBLog.h"
5-
#include "lldb/Utility/Log.h"
65
#include "lldb/Version/Version.h"
7-
#include "llvm/CodeGen/AccelTable.h"
8-
#include "llvm/CodeGen/AsmPrinter.h"
96
#include "llvm/Support/BLAKE3.h"
107
#include "llvm/Support/CachePruning.h"
11-
#include "llvm/Support/Compression.h"
128

139
using namespace lldb;
1410
using namespace lldb_private;
@@ -32,9 +28,7 @@ SwiftMetadataCache::SwiftMetadataCache() {
3228
}
3329
}
3430

35-
bool SwiftMetadataCache::is_enabled() {
36-
return llvm::zlib::isAvailable() && m_data_file_cache.hasValue();
37-
}
31+
bool SwiftMetadataCache::is_enabled() { return m_data_file_cache.hasValue(); }
3832

3933
void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
4034
uint64_t info_id) {
@@ -44,7 +38,8 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
4438
if (!is_enabled())
4539
return;
4640

47-
m_info_to_module[info_id] = {module, false};
41+
/// Insert the module cache info as not processed.
42+
m_reflection_info_to_module.insert({info_id, module});
4843

4944
// Attempt to load the cached file.
5045
auto module_name = getTyperefCacheFileNameForModule(module);
@@ -57,17 +52,32 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
5752
return;
5853
}
5954

55+
// Move it to the instance variable so references to this data don't go
56+
// out of scope.
57+
m_hash_table_buffers.emplace_back(std::move(mem_buffer_up));
58+
auto &mem_buffer = m_hash_table_buffers.back();
59+
6060
// Extractor used to extract the header information (see the .h file for
6161
// details on the format).
62-
DataExtractor header_extractor(mem_buffer_up->getBufferStart(),
63-
mem_buffer_up->getBufferSize(),
62+
DataExtractor header_extractor(mem_buffer->getBufferStart(),
63+
mem_buffer->getBufferSize(),
6464
module->GetObjectFile()->GetByteOrder(),
6565
module->GetObjectFile()->GetAddressByteSize());
6666

6767
lldb::offset_t read_offset = 0;
68+
uint16_t cached_UUID_size = 0;
69+
if (!header_extractor.GetU16(&read_offset, &cached_UUID_size, 1)) {
70+
LLDB_LOG(log,
71+
"[SwiftMetadataCache] Failed to read cached UUID size for module {0}.",
72+
module->GetFileSpec().GetFilename());
73+
m_data_file_cache->RemoveCacheFile(module_name);
74+
return;
75+
}
6876

69-
std::string UUID = module->GetUUID().GetAsString();
70-
std::string cached_UUID = header_extractor.GetCStr(&read_offset);
77+
const auto *cached_UUID_data = reinterpret_cast<const uint8_t *>(
78+
header_extractor.GetData(&read_offset, cached_UUID_size));
79+
80+
llvm::ArrayRef<uint8_t> cached_UUID(cached_UUID_data, cached_UUID_size);
7181
// If no uuid in the file something is wrong with the cache.
7282
if (cached_UUID.empty()) {
7383
LLDB_LOG(log,
@@ -77,6 +87,7 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
7787
return;
7888
}
7989

90+
auto UUID = module->GetUUID().GetBytes();
8091
// If the UUIDs don't match this is most likely a stale cache.
8192
if (cached_UUID != UUID) {
8293
LLDB_LOGV(log, "[SwiftMetadataCache] Module UUID mismatch for {0}.",
@@ -85,73 +96,31 @@ void SwiftMetadataCache::registerModuleWithReflectionInfoID(ModuleSP module,
8596
return;
8697
}
8798

88-
uint64_t expanded_size = 0;
89-
if (!header_extractor.GetU64(&read_offset, &expanded_size, 1)) {
99+
// The on disk hash table must have a 4-byte alignment, skip
100+
// the padding when reading.
101+
read_offset = llvm::alignTo(read_offset, 4);
102+
103+
// The offset of the hash table control structure, which follows the payload.
104+
uint32_t table_control_offset = 0;
105+
if (!header_extractor.GetU32(&read_offset, &table_control_offset, 1)) {
90106
LLDB_LOGV(log,
91-
"[SwiftMetadataCache] Failed to read decompressed cache size for "
107+
"[SwiftMetadataCache] Failed to read table offset for "
92108
"module {0}.",
93109
module->GetFileSpec().GetFilename());
94110
m_data_file_cache->RemoveCacheFile(module_name);
95111
return;
96112
}
97113

98-
const auto *start = (const char *)header_extractor.GetData(&read_offset, 0);
99-
// Create a reference to the compressed data.
100-
llvm::StringRef string_buffer(start, (uint64_t)mem_buffer_up->getBufferEnd() -
101-
(uint64_t)start);
102-
103-
llvm::SmallString<0> decompressed;
104-
auto error =
105-
llvm::zlib::uncompress(string_buffer, decompressed, expanded_size);
106-
if (error) {
107-
auto error_string = llvm::toString(std::move(error));
108-
LLDB_LOG(log,
109-
"[SwiftMetadataCache] Cache decompression failed with error: {0}. "
110-
"Deleting cached file.",
111-
error_string);
112-
m_data_file_cache->RemoveCacheFile(module_name);
113-
return;
114-
}
114+
const auto *table_contents = reinterpret_cast<const uint8_t *>(
115+
header_extractor.GetData(&read_offset, 0));
115116

116-
// Extractor to extract the body of the cached file (see SwiftMetadataCache.h
117-
// for more details of the format).
118-
DataExtractor body_extractor(decompressed.data(), decompressed.size(),
119-
module->GetObjectFile()->GetByteOrder(),
120-
module->GetObjectFile()->GetAddressByteSize());
121-
read_offset = 0;
122-
auto num_entries = body_extractor.GetU64(&read_offset);
123-
124-
// Map to extract the encoded data to. Since extraction can fail we don't want
125-
// to insert values into the final map in case we have to abort midway.
126-
llvm::StringMap<swift::remote::FieldDescriptorLocator> temp_map;
127-
for (size_t i = 0; i < num_entries; i++) {
128-
const auto *mangled_name = body_extractor.GetCStr(&read_offset);
129-
if (!mangled_name) {
130-
LLDB_LOG(log,
131-
"[SwiftMetadataCache] Failed to read mangled name {0} at offset "
132-
"{1} for module {2}.",
133-
i, read_offset, module->GetFileSpec().GetFilename());
134-
m_data_file_cache->RemoveCacheFile(module_name);
135-
return;
136-
}
137-
uint64_t offset = 0;
138-
if (!body_extractor.GetU64(&read_offset, &offset, 1)) {
139-
LLDB_LOG(log,
140-
"[SwiftMetadataCache] Failed to read mangled name {0} at offset "
141-
"{1} for module {2}.",
142-
i, read_offset, module->GetFileSpec().GetFilename());
143-
m_data_file_cache->RemoveCacheFile(module_name);
144-
return;
145-
}
146-
temp_map[mangled_name] = {info_id, offset};
147-
}
117+
const auto *table_control = table_contents + table_control_offset;
148118

149-
// Move the values to the actual map now that we know that it's safe.
150-
for (auto &p : temp_map)
151-
m_mangled_name_to_offset.try_emplace(p.getKey(), p.second);
119+
// Store the hash table.
120+
m_reflection_info_to_module.find(info_id)->second.cache_hash_table.reset(
121+
llvm::OnDiskChainedHashTable<TypeRefInfo>::Create(
122+
table_control, table_contents, m_info));
152123

153-
// Mark this reflection info as processed.
154-
m_info_to_module[info_id] = {module, true};
155124
LLDB_LOGV(log, "[SwiftMetadataCache] Loaded cache for module {0}.",
156125
module->GetFileSpec().GetFilename());
157126
}
@@ -167,33 +136,38 @@ static bool areMangledNamesAndFieldSectionSameSize(
167136
return field_descriptors_size == mangled_names.size();
168137
}
169138

170-
bool SwiftMetadataCache::writeMangledNamesAndOffsetsToEncoder(
139+
llvm::Optional<std::pair<uint32_t, llvm::SmallString<32>>>
140+
SwiftMetadataCache::generateHashTableBlob(
171141
uint64_t info_id, const swift::reflection::FieldSection &field_descriptors,
172-
const std::vector<std::string> &mangled_names, DataEncoder &encoder) {
142+
const std::vector<std::string> &mangled_names) {
173143
Log *log = GetLog(LLDBLog::Types);
174-
auto num_entries = mangled_names.size();
175-
encoder.AppendU64(num_entries);
144+
llvm::SmallString<32> hash_table_blob;
145+
llvm::raw_svector_ostream blobStream(hash_table_blob);
176146

177147
// If the amount of mangled names and field descriptors don't match something
178148
// unexpected happened.
179149
if (!areMangledNamesAndFieldSectionSameSize(field_descriptors,
180150
mangled_names)) {
181151
LLDB_LOG(log, "[SwiftMetadataCache] Mismatch between number of mangled "
182152
"names and field descriptors passed in.");
183-
return false;
153+
return {};
184154
}
185155

156+
llvm::OnDiskChainedHashTableGenerator<TypeRefInfo> table_generator;
186157
for (auto pair : llvm::zip(field_descriptors, mangled_names)) {
187158
auto field_descriptor = std::get<0>(pair);
188159
auto &mangled_name = std::get<1>(pair);
189160
if (mangled_name.empty())
190161
continue;
191162
auto offset = field_descriptor.getAddressData() -
192163
field_descriptors.startAddress().getAddressData();
193-
encoder.AppendCString(mangled_name.data());
194-
encoder.AppendU64(offset);
164+
table_generator.insert(mangled_name, offset, m_info);
195165
}
196-
return true;
166+
167+
// Make sure that no bucket is at offset 0.
168+
llvm::support::endian::write<uint32_t>(blobStream, 0, llvm::support::little);
169+
uint32_t table_control_offset = table_generator.Emit(blobStream, m_info);
170+
return {{std::move(table_control_offset), std::move(hash_table_blob)}};
197171
}
198172

199173
void SwiftMetadataCache::cacheFieldDescriptors(
@@ -205,33 +179,40 @@ void SwiftMetadataCache::cacheFieldDescriptors(
205179
if (!is_enabled())
206180
return;
207181

208-
auto it = m_info_to_module.find(info_id);
209-
if (it == m_info_to_module.end()) {
182+
auto it = m_reflection_info_to_module.find(info_id);
183+
if (it == m_reflection_info_to_module.end()) {
210184
LLDB_LOGV(log, "[SwiftMetadataCache] No module found with module id {0}.",
211185
info_id);
212186
return;
213187
}
214188

215-
auto module = std::get<ModuleSP>(it->second);
216-
// Write the data to the body encoder with the format expected by the current
217-
// cache version.
218-
DataEncoder body_encoder;
219-
if (!writeMangledNamesAndOffsetsToEncoder(info_id, field_descriptors,
220-
mangled_names, body_encoder))
221-
return;
189+
auto &module = it->second.module;
222190

223-
uint64_t typeref_buffer_size = body_encoder.GetData().size();
224-
llvm::StringRef typeref_buffer((const char *)body_encoder.GetData().data(),
225-
typeref_buffer_size);
191+
auto maybe_pair =
192+
generateHashTableBlob(info_id, field_descriptors, mangled_names);
193+
if (!maybe_pair)
194+
return;
226195

227-
llvm::SmallString<0> compressed_buffer;
228-
llvm::zlib::compress(typeref_buffer, compressed_buffer);
196+
auto &table_offset = maybe_pair->first;
197+
auto &hash_table_blob = maybe_pair->second;
229198

230199
// Write the header followed by the body.
231200
DataEncoder encoder;
232-
encoder.AppendCString(module->GetUUID().GetAsString());
233-
encoder.AppendU64(typeref_buffer_size);
234-
encoder.AppendData(compressed_buffer);
201+
auto uuid = module->GetUUID().GetBytes();
202+
// Append the uuid size followed by the uuid itself.
203+
encoder.AppendU16(uuid.size());
204+
encoder.AppendData(uuid);
205+
206+
207+
auto size_so_far = encoder.GetByteSize();
208+
// The on disk hash table must have a 4-byte alignment, so
209+
// write 0 bytes until we get to the required alignemnt.
210+
auto padding = llvm::alignTo(size_so_far, 4) - size_so_far;
211+
while (padding-- > 0)
212+
encoder.AppendU8(0);
213+
214+
encoder.AppendU32(table_offset);
215+
encoder.AppendData(hash_table_blob);
235216

236217
auto filename = getTyperefCacheFileNameForModule(module);
237218

@@ -244,24 +225,33 @@ llvm::Optional<swift::remote::FieldDescriptorLocator>
244225
SwiftMetadataCache::getFieldDescriptorLocator(const std::string &Name) {
245226
std::lock_guard<std::recursive_mutex> guard(m_mutex);
246227
Log *log = GetLog(LLDBLog::Types);
247-
auto it = m_mangled_name_to_offset.find(Name);
248-
if (it != m_mangled_name_to_offset.end()) {
249-
LLDB_LOGV(
250-
log,
251-
"[SwiftMetadataCache] Returning field descriptor for mangled name {0}",
252-
Name);
253-
return it->second;
228+
// Compute hash outside of loop as an optimization.
229+
auto hash = m_info.ComputeHash(Name);
230+
for (auto &pair : m_reflection_info_to_module) {
231+
auto &cache_hash_table = pair.second.cache_hash_table;
232+
// No cache for this reflection module.
233+
if (!cache_hash_table)
234+
continue;
235+
auto it = cache_hash_table->find_hashed(Name, hash, &m_info);
236+
if (it != cache_hash_table->end()) {
237+
LLDB_LOGV(log,
238+
"[SwiftMetadataCache] Returning field descriptor for mangled "
239+
"name {0}",
240+
Name);
241+
auto info_id = pair.first;
242+
return {{info_id, *it}};
243+
}
254244
}
255245
return {};
256246
}
257247

258248
bool SwiftMetadataCache::isReflectionInfoCached(uint64_t info_id) {
259249
std::lock_guard<std::recursive_mutex> guard(m_mutex);
260-
auto it = m_info_to_module.find(info_id);
250+
auto it = m_reflection_info_to_module.find(info_id);
261251
// First check if we've registered the reflection info with that id.
262-
if (it != m_info_to_module.end())
263-
// Then check whether we've already parsed it or not.
264-
return std::get<bool>(it->second);
252+
if (it != m_reflection_info_to_module.end())
253+
// Then check whether we have a cache for it or not.
254+
return it->second.cache_hash_table.get() != nullptr;
265255
return false;
266256
}
267257

0 commit comments

Comments
 (0)