Skip to content

Commit 5c0d61e

Browse files
[LTO] Reduce memory usage for import lists (#106772)
This patch reduces the memory usage for import lists by employing memory-efficient data structures. With this patch, an import list for a given destination module is basically DenseSet<uint32_t> with each element indexing into the deduplication table containing tuples of: {SourceModule, GUID, Definition/Declaration} In one of our large applications, the peak memory usage goes down by 9.2% from 6.120GB to 5.555GB during the LTO indexing step. This patch addresses several sources of space inefficiency associated with std::unordered_map: - std::unordered_map<GUID, ImportKind> takes up 16 bytes because of padding even though ImportKind only carries one bit of information. - std::unordered_map uses pointers to elements, both in the hash table proper and for collision chains. - We allocate an instance of std::unordered_map for each {Destination Module, Source Module} pair for which we have at least one import. Most import lists have less than 10 imports, so the metadata like the size of std::unordered_map and the pointer to the hash table costs a lot relative to the actual contents.
1 parent 9ccf825 commit 5c0d61e

File tree

5 files changed

+143
-121
lines changed

5 files changed

+143
-121
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1206,7 +1206,8 @@ static void runThinLTOBackend(
12061206
// We can simply import the values mentioned in the combined index, since
12071207
// we should only invoke this using the individual indexes written out
12081208
// via a WriteIndexesThinBackend.
1209-
FunctionImporter::ImportMapTy ImportList;
1209+
FunctionImporter::ImportIDTable ImportIDs;
1210+
FunctionImporter::ImportMapTy ImportList(ImportIDs);
12101211
if (!lto::initImportList(*M, *CombinedIndex, ImportList))
12111212
return;
12121213

llvm/include/llvm/Transforms/IPO/FunctionImport.h

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include <memory>
2222
#include <string>
2323
#include <system_error>
24-
#include <unordered_map>
25-
#include <unordered_set>
2624
#include <utility>
2725

2826
namespace llvm {
@@ -33,14 +31,6 @@ class Module;
3331
/// based on the provided summary informations.
3432
class FunctionImporter {
3533
public:
36-
/// The functions to import from a source module and their import type.
37-
/// Note we choose unordered_map over (Small)DenseMap. The number of imports
38-
/// from a source module could be small but DenseMap size grows to 64 quickly
39-
/// and not memory efficient (see
40-
/// https://llvm.org/docs/ProgrammersManual.html#llvm-adt-densemap-h)
41-
using FunctionsToImportTy =
42-
std::unordered_map<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
43-
4434
/// The different reasons selectCallee will chose not to import a
4535
/// candidate.
4636
enum class ImportFailureReason {
@@ -156,6 +146,12 @@ class FunctionImporter {
156146
return std::make_tuple(FromModule, GUID, Kind);
157147
}
158148

149+
// The same as lookup above. Useful for map_iterator.
150+
std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind>
151+
operator()(ImportIDTable::ImportIDTy ImportID) const {
152+
return lookup(ImportID);
153+
}
154+
159155
private:
160156
// Make a pair of import IDs [Def, Decl] from an index into TheTable.
161157
static std::pair<ImportIDTy, ImportIDTy> makeIDPair(ImportIDTy Index) {
@@ -167,6 +163,9 @@ class FunctionImporter {
167163
MapVector<std::pair<StringRef, GlobalValue::GUID>, ImportIDTy> TheTable;
168164
};
169165

166+
// Forward-declare SortedImportList for ImportMapTy.
167+
class SortedImportList;
168+
170169
/// The map maintains the list of imports. Conceptually, it is a collection
171170
/// of tuples of the form:
172171
///
@@ -179,8 +178,6 @@ class FunctionImporter {
179178
/// path string table).
180179
class ImportMapTy {
181180
public:
182-
using ImportMapTyImpl = DenseMap<StringRef, FunctionsToImportTy>;
183-
184181
enum class AddDefinitionStatus {
185182
// No change was made to the list of imports or whether each import should
186183
// be imported as a declaration or definition.
@@ -192,6 +189,9 @@ class FunctionImporter {
192189
ChangedToDefinition,
193190
};
194191

192+
ImportMapTy() = delete;
193+
ImportMapTy(ImportIDTable &IDs) : IDs(IDs) {}
194+
195195
// Add the given GUID to ImportList as a definition. If the same GUID has
196196
// been added as a declaration previously, that entry is overridden.
197197
AddDefinitionStatus addDefinition(StringRef FromModule,
@@ -215,13 +215,49 @@ class FunctionImporter {
215215
SmallVector<StringRef, 0> getSourceModules() const;
216216

217217
std::optional<GlobalValueSummary::ImportKind>
218-
getImportType(const FunctionsToImportTy &GUIDToImportType,
219-
GlobalValue::GUID GUID) const;
218+
getImportType(StringRef FromModule, GlobalValue::GUID GUID) const;
219+
220+
// Iterate over the import list. The caller gets tuples of FromModule,
221+
// GUID, and ImportKind instead of import IDs.
222+
auto begin() const { return map_iterator(Imports.begin(), IDs); }
223+
auto end() const { return map_iterator(Imports.end(), IDs); }
224+
225+
friend class SortedImportList;
226+
227+
private:
228+
ImportIDTable &IDs;
229+
DenseSet<ImportIDTable::ImportIDTy> Imports;
230+
};
231+
232+
// A read-only copy of ImportMapTy with its contents sorted according to the
233+
// given comparison function.
234+
class SortedImportList {
235+
public:
236+
SortedImportList(const ImportMapTy &ImportMap,
237+
llvm::function_ref<
238+
bool(const std::pair<StringRef, GlobalValue::GUID> &,
239+
const std::pair<StringRef, GlobalValue::GUID> &)>
240+
Comp)
241+
: IDs(ImportMap.IDs), Imports(iterator_range(ImportMap.Imports)) {
242+
llvm::sort(Imports, [&](ImportIDTable::ImportIDTy L,
243+
ImportIDTable::ImportIDTy R) {
244+
auto Lookup = [&](ImportIDTable::ImportIDTy Id)
245+
-> std::pair<StringRef, GlobalValue::GUID> {
246+
auto Tuple = IDs.lookup(Id);
247+
return std::make_pair(std::get<0>(Tuple), std::get<1>(Tuple));
248+
};
249+
return Comp(Lookup(L), Lookup(R));
250+
});
251+
}
220252

221-
const ImportMapTyImpl &getImportMap() const { return ImportMap; }
253+
// Iterate over the import list. The caller gets tuples of FromModule,
254+
// GUID, and ImportKind instead of import IDs.
255+
auto begin() const { return map_iterator(Imports.begin(), IDs); }
256+
auto end() const { return map_iterator(Imports.end(), IDs); }
222257

223258
private:
224-
ImportMapTyImpl ImportMap;
259+
const ImportIDTable &IDs;
260+
SmallVector<ImportIDTable::ImportIDTy, 0> Imports;
225261
};
226262

227263
// A map from destination modules to lists of imports.
@@ -231,7 +267,7 @@ class FunctionImporter {
231267
ImportListsTy(size_t Size) : ListsImpl(Size) {}
232268

233269
ImportMapTy &operator[](StringRef DestMod) {
234-
return ListsImpl.try_emplace(DestMod).first->second;
270+
return ListsImpl.try_emplace(DestMod, ImportIDs).first->second;
235271
}
236272

237273
size_t size() const { return ListsImpl.size(); }
@@ -242,6 +278,7 @@ class FunctionImporter {
242278

243279
private:
244280
DenseMap<StringRef, ImportMapTy> ListsImpl;
281+
ImportIDTable ImportIDs;
245282
};
246283

247284
/// The set contains an entry for every global value that the module exports.

llvm/lib/LTO/LTO.cpp

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -174,51 +174,33 @@ std::string llvm::computeLTOCacheKey(
174174
for (auto GUID : ExportsGUID)
175175
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
176176

177-
// Include the hash for every module we import functions from. The set of
178-
// imported symbols for each module may affect code generation and is
179-
// sensitive to link order, so include that as well.
180-
using ImportMapIteratorTy =
181-
FunctionImporter::ImportMapTy::ImportMapTyImpl::const_iterator;
182-
struct ImportModule {
183-
ImportMapIteratorTy ModIt;
184-
const ModuleSummaryIndex::ModuleInfo *ModInfo;
185-
186-
StringRef getIdentifier() const { return ModIt->getFirst(); }
187-
const FunctionImporter::FunctionsToImportTy &getFunctions() const {
188-
return ModIt->second;
189-
}
190-
191-
const ModuleHash &getHash() const { return ModInfo->second; }
192-
};
193-
194-
std::vector<ImportModule> ImportModulesVector;
195-
ImportModulesVector.reserve(ImportList.getImportMap().size());
196-
197-
for (ImportMapIteratorTy It = ImportList.getImportMap().begin();
198-
It != ImportList.getImportMap().end(); ++It) {
199-
ImportModulesVector.push_back({It, Index.getModule(It->getFirst())});
200-
}
201177
// Order using module hash, to be both independent of module name and
202178
// module order.
203-
llvm::sort(ImportModulesVector,
204-
[](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
205-
return Lhs.getHash() < Rhs.getHash();
206-
});
207-
std::vector<std::pair<uint64_t, uint8_t>> ImportedGUIDs;
208-
for (const ImportModule &Entry : ImportModulesVector) {
209-
auto ModHash = Entry.getHash();
210-
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
211-
212-
AddUint64(Entry.getFunctions().size());
213-
214-
ImportedGUIDs.clear();
215-
for (auto &[Fn, ImportType] : Entry.getFunctions())
216-
ImportedGUIDs.push_back(std::make_pair(Fn, ImportType));
217-
llvm::sort(ImportedGUIDs);
218-
for (auto &[GUID, Type] : ImportedGUIDs) {
219-
AddUint64(GUID);
220-
AddUint8(Type);
179+
auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
180+
const std::pair<StringRef, GlobalValue::GUID> &R) {
181+
return std::make_pair(Index.getModule(L.first)->second, L.second) <
182+
std::make_pair(Index.getModule(R.first)->second, R.second);
183+
};
184+
FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
185+
186+
// Count the number of imports for each source module.
187+
DenseMap<StringRef, unsigned> ModuleToNumImports;
188+
for (const auto &[FromModule, GUID, Type] : SortedImportList)
189+
++ModuleToNumImports[FromModule];
190+
191+
std::optional<StringRef> LastModule;
192+
for (const auto &[FromModule, GUID, Type] : SortedImportList) {
193+
if (LastModule != FromModule) {
194+
// Include the hash for every module we import functions from. The set of
195+
// imported symbols for each module may affect code generation and is
196+
// sensitive to link order, so include that as well.
197+
LastModule = FromModule;
198+
auto ModHash = Index.getModule(FromModule)->second;
199+
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
200+
AddUint64(ModuleToNumImports[FromModule]);
221201
}
202+
AddUint64(GUID);
203+
AddUint8(Type);
222204
}
223205

224206
// Include the hash for the resolved ODR.
@@ -287,16 +269,14 @@ std::string llvm::computeLTOCacheKey(
287269

288270
// Imported functions may introduce new uses of type identifier resolutions,
289271
// so we need to collect their used resolutions as well.
290-
for (const ImportModule &ImpM : ImportModulesVector)
291-
for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
292-
GlobalValueSummary *S =
293-
Index.findSummaryInModule(GUID, ImpM.getIdentifier());
294-
AddUsedThings(S);
295-
// If this is an alias, we also care about any types/etc. that the aliasee
296-
// may reference.
297-
if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
298-
AddUsedThings(AS->getBaseObject());
299-
}
272+
for (const auto &[FromModule, GUID, Type] : SortedImportList) {
273+
GlobalValueSummary *S = Index.findSummaryInModule(GUID, FromModule);
274+
AddUsedThings(S);
275+
// If this is an alias, we also care about any types/etc. that the aliasee
276+
// may reference.
277+
if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
278+
AddUsedThings(AS->getBaseObject());
279+
}
300280

301281
auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
302282
AddString(TId);

0 commit comments

Comments
 (0)