Skip to content

Commit d7f598c

Browse files
committed
[Utils] Identity map global debug info on first use in CloneFunction*
Summary: To avoid cloning 'global' debug info, CloneFunction implementation used to eagerly identity map a known subset of global debug into into ValueMap's MD map. In larger modules with meaningful volume of debug info this gets very expensive. By passing such global metadata via an IdentityMD set for the ValueMapper to map on first use, we get several benefits: 1. Mapping metadata is not cheap, particularly because of tracking. When cloning a Function we identity map lots of global module-level metadata to avoid cloning it, while only a fraction of it is actually used by the function. Mapping on first use is a lot faster for modules with meaningful amount of debug info. 2. Eagerly identity mapping metadata makes it harder to cache module-level data (e.g. a set of metadata nodes in a \a DICompileUnit). With this patch we can cache certain module-level metadata calculations to speed things up further. Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up CoroSplitPass which is one of the heavier users of cloning: | | Baseline | IdentityMD set | |-----------------+----------+----------------| | CoroSplitPass | 306ms | 221ms | | CoroCloner | 101ms | 72ms | |-----------------+----------+----------------| | Speed up | 1x | 1.4x | Test Plan: ninja check-llvm-unit ninja check-llvm
1 parent 0eefa6a commit d7f598c

File tree

4 files changed

+103
-59
lines changed

4 files changed

+103
-59
lines changed

llvm/include/llvm/Transforms/Utils/Cloning.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
190190
void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
191191
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
192192
ValueMapTypeRemapper *TypeMapper = nullptr,
193-
ValueMaterializer *Materializer = nullptr);
193+
ValueMaterializer *Materializer = nullptr,
194+
const MetadataSetTy *IdentityMD = nullptr);
194195

195196
/// Clone OldFunc's body NewFunct.
196197
void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
@@ -199,7 +200,8 @@ void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
199200
const char *NameSuffix = "",
200201
ClonedCodeInfo *CodeInfo = nullptr,
201202
ValueMapTypeRemapper *TypeMapper = nullptr,
202-
ValueMaterializer *Materializer = nullptr);
203+
ValueMaterializer *Materializer = nullptr,
204+
const MetadataSetTy *IdentityMD = nullptr);
203205

204206
void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
205207
const Instruction *StartingInst,
@@ -230,12 +232,12 @@ DISubprogram *ProcessSubprogramAttachment(const Function &F,
230232
CloneFunctionChangeType Changes,
231233
DebugInfoFinder &DIFinder);
232234

233-
/// Build a map of debug info to use during Metadata cloning.
234-
/// Returns true if cloning would need module level changes and false if there
235-
/// would only be local changes.
236-
bool BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
237-
DebugInfoFinder &DIFinder,
238-
DISubprogram *SPClonedWithinModule);
235+
/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
236+
/// needs to be identity mapped during Metadata cloning.
237+
void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
238+
CloneFunctionChangeType Changes,
239+
DebugInfoFinder &DIFinder,
240+
DISubprogram *SPClonedWithinModule);
239241

240242
/// This class captures the data input to the InlineFunction call, and records
241243
/// the auxiliary results produced by it.

llvm/include/llvm/Transforms/Utils/ValueMapper.h

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
1616

1717
#include "llvm/ADT/ArrayRef.h"
18+
#include "llvm/ADT/SmallPtrSet.h"
1819
#include "llvm/ADT/simple_ilist.h"
1920
#include "llvm/IR/ValueHandle.h"
2021
#include "llvm/IR/ValueMap.h"
@@ -35,6 +36,7 @@ class Value;
3536

3637
using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
3738
using DbgRecordIterator = simple_ilist<DbgRecord>::iterator;
39+
using MetadataSetTy = SmallPtrSet<const Metadata *, 16>;
3840

3941
/// This is a class that can be implemented by clients to remap types when
4042
/// cloning constants and instructions.
@@ -136,6 +138,18 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
136138
/// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
137139
/// pass into the schedule*() functions.
138140
///
141+
/// NOTE: \c IdentityMD is used by CloneFunction* to directly specify metadata
142+
/// that should be identity mapped (and hence not cloned). The metadata will be
143+
/// identity mapped in \c VM on first use. There are several reasons for doing
144+
/// it this way rather than eagerly identity mapping metadata nodes in \c VM:
145+
/// 1. Mapping metadata is not cheap, particularly because of tracking.
146+
/// 2. When cloning a Function we identity map lots of global module-level
147+
/// metadata to avoid cloning it, while only a fraction of it is actually
148+
/// used by the function. Mapping on first use is a lot faster for modules
149+
/// with meaningful amount of debug info.
150+
/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
151+
/// data (e.g. a set of metadata nodes in a \a DICompileUnit).
152+
///
139153
/// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
140154
/// ValueToValueMapTy. We should template \a ValueMapper (and its
141155
/// implementation classes), and explicitly instantiate on two concrete
@@ -152,7 +166,8 @@ class ValueMapper {
152166
public:
153167
ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
154168
ValueMapTypeRemapper *TypeMapper = nullptr,
155-
ValueMaterializer *Materializer = nullptr);
169+
ValueMaterializer *Materializer = nullptr,
170+
const MetadataSetTy *IdentityMD = nullptr);
156171
ValueMapper(ValueMapper &&) = delete;
157172
ValueMapper(const ValueMapper &) = delete;
158173
ValueMapper &operator=(ValueMapper &&) = delete;
@@ -218,8 +233,10 @@ class ValueMapper {
218233
inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
219234
RemapFlags Flags = RF_None,
220235
ValueMapTypeRemapper *TypeMapper = nullptr,
221-
ValueMaterializer *Materializer = nullptr) {
222-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapValue(*V);
236+
ValueMaterializer *Materializer = nullptr,
237+
const MetadataSetTy *IdentityMD = nullptr) {
238+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
239+
.mapValue(*V);
223240
}
224241

225242
/// Lookup or compute a mapping for a piece of metadata.
@@ -231,7 +248,9 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
231248
/// \c MD.
232249
/// 3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
233250
/// re-wrap its return (returning nullptr on nullptr).
234-
/// 4. Else, \c MD is an \a MDNode. These are remapped, along with their
251+
/// 4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
252+
/// and return it.
253+
/// 5. Else, \c MD is an \a MDNode. These are remapped, along with their
235254
/// transitive operands. Distinct nodes are duplicated or moved depending
236255
/// on \a RF_MoveDistinctNodes. Uniqued nodes are remapped like constants.
237256
///
@@ -240,16 +259,20 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
240259
inline Metadata *MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
241260
RemapFlags Flags = RF_None,
242261
ValueMapTypeRemapper *TypeMapper = nullptr,
243-
ValueMaterializer *Materializer = nullptr) {
244-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMetadata(*MD);
262+
ValueMaterializer *Materializer = nullptr,
263+
const MetadataSetTy *IdentityMD = nullptr) {
264+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
265+
.mapMetadata(*MD);
245266
}
246267

247268
/// Version of MapMetadata with type safety for MDNode.
248269
inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
249270
RemapFlags Flags = RF_None,
250271
ValueMapTypeRemapper *TypeMapper = nullptr,
251-
ValueMaterializer *Materializer = nullptr) {
252-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMDNode(*MD);
272+
ValueMaterializer *Materializer = nullptr,
273+
const MetadataSetTy *IdentityMD = nullptr) {
274+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
275+
.mapMDNode(*MD);
253276
}
254277

255278
/// Convert the instruction operands from referencing the current values into
@@ -263,17 +286,21 @@ inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
263286
inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
264287
RemapFlags Flags = RF_None,
265288
ValueMapTypeRemapper *TypeMapper = nullptr,
266-
ValueMaterializer *Materializer = nullptr) {
267-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapInstruction(*I);
289+
ValueMaterializer *Materializer = nullptr,
290+
const MetadataSetTy *IdentityMD = nullptr) {
291+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
292+
.remapInstruction(*I);
268293
}
269294

270295
/// Remap the Values used in the DbgRecord \a DR using the value map \a
271296
/// VM.
272297
inline void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM,
273298
RemapFlags Flags = RF_None,
274299
ValueMapTypeRemapper *TypeMapper = nullptr,
275-
ValueMaterializer *Materializer = nullptr) {
276-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapDbgRecord(M, *DR);
300+
ValueMaterializer *Materializer = nullptr,
301+
const MetadataSetTy *IdentityMD = nullptr) {
302+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
303+
.remapDbgRecord(M, *DR);
277304
}
278305

279306
/// Remap the Values used in the DbgRecords \a Range using the value map \a
@@ -283,8 +310,9 @@ inline void RemapDbgRecordRange(Module *M,
283310
ValueToValueMapTy &VM,
284311
RemapFlags Flags = RF_None,
285312
ValueMapTypeRemapper *TypeMapper = nullptr,
286-
ValueMaterializer *Materializer = nullptr) {
287-
ValueMapper(VM, Flags, TypeMapper, Materializer)
313+
ValueMaterializer *Materializer = nullptr,
314+
const MetadataSetTy *IdentityMD = nullptr) {
315+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
288316
.remapDbgRecordRange(M, Range);
289317
}
290318

@@ -297,16 +325,19 @@ inline void RemapDbgRecordRange(Module *M,
297325
inline void RemapFunction(Function &F, ValueToValueMapTy &VM,
298326
RemapFlags Flags = RF_None,
299327
ValueMapTypeRemapper *TypeMapper = nullptr,
300-
ValueMaterializer *Materializer = nullptr) {
301-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapFunction(F);
328+
ValueMaterializer *Materializer = nullptr,
329+
const MetadataSetTy *IdentityMD = nullptr) {
330+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD).remapFunction(F);
302331
}
303332

304333
/// Version of MapValue with type safety for Constant.
305334
inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
306335
RemapFlags Flags = RF_None,
307336
ValueMapTypeRemapper *TypeMapper = nullptr,
308-
ValueMaterializer *Materializer = nullptr) {
309-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapConstant(*V);
337+
ValueMaterializer *Materializer = nullptr,
338+
const MetadataSetTy *IdentityMD = nullptr) {
339+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
340+
.mapConstant(*V);
310341
}
311342

312343
} // end namespace llvm

llvm/lib/Transforms/Utils/CloneFunction.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -153,63 +153,57 @@ DISubprogram *llvm::ProcessSubprogramAttachment(const Function &F,
153153
return SPClonedWithinModule;
154154
}
155155

156-
bool llvm::BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
157-
DebugInfoFinder &DIFinder,
158-
DISubprogram *SPClonedWithinModule) {
159-
bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
156+
void llvm::FindDebugInfoToIdentityMap(MetadataSetTy &MD,
157+
CloneFunctionChangeType Changes,
158+
DebugInfoFinder &DIFinder,
159+
DISubprogram *SPClonedWithinModule) {
160160
if (Changes < CloneFunctionChangeType::DifferentModule &&
161161
DIFinder.subprogram_count() > 0) {
162-
// Turn on module-level changes, since we need to clone (some of) the
163-
// debug info metadata.
162+
// Even if Changes are local only, we turn on module-level changes, since we
163+
// need to clone (some of) the debug info metadata.
164164
//
165165
// FIXME: Metadata effectively owned by a function should be made
166166
// local, and only that local metadata should be cloned.
167-
ModuleLevelChanges = true;
168-
169-
auto mapToSelfIfNew = [&MD](MDNode *N) {
170-
// Avoid clobbering an existing mapping.
171-
(void)MD.try_emplace(N, N);
172-
};
173167

174168
// Avoid cloning types, compile units, and (other) subprograms.
175169
for (DISubprogram *ISP : DIFinder.subprograms()) {
176170
if (ISP != SPClonedWithinModule)
177-
mapToSelfIfNew(ISP);
171+
MD.insert(ISP);
178172
}
179173

180174
// If a subprogram isn't going to be cloned skip its lexical blocks as well.
181175
for (DIScope *S : DIFinder.scopes()) {
182176
auto *LScope = dyn_cast<DILocalScope>(S);
183177
if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
184-
mapToSelfIfNew(S);
178+
MD.insert(S);
185179
}
186180

187181
for (DICompileUnit *CU : DIFinder.compile_units())
188-
mapToSelfIfNew(CU);
182+
MD.insert(CU);
189183

190184
for (DIType *Type : DIFinder.types())
191-
mapToSelfIfNew(Type);
185+
MD.insert(Type);
192186
} else {
193187
assert(!SPClonedWithinModule &&
194188
"Subprogram should be in DIFinder->subprogram_count()...");
195189
}
196-
197-
return ModuleLevelChanges;
198190
}
199191

200192
void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
201193
ValueToValueMapTy &VMap,
202194
RemapFlags RemapFlag,
203195
ValueMapTypeRemapper *TypeMapper,
204-
ValueMaterializer *Materializer) {
196+
ValueMaterializer *Materializer,
197+
const MetadataSetTy *IdentityMD) {
205198
// Duplicate the metadata that is attached to the cloned function.
206199
// Subprograms/CUs/types that were already mapped to themselves won't be
207200
// duplicated.
208201
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
209202
OldFunc->getAllMetadata(MDs);
210203
for (auto MD : MDs) {
211-
NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
212-
TypeMapper, Materializer));
204+
NewFunc->addMetadata(MD.first,
205+
*MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
206+
Materializer, IdentityMD));
213207
}
214208
}
215209

@@ -219,7 +213,8 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
219213
const char *NameSuffix,
220214
ClonedCodeInfo *CodeInfo,
221215
ValueMapTypeRemapper *TypeMapper,
222-
ValueMaterializer *Materializer) {
216+
ValueMaterializer *Materializer,
217+
const MetadataSetTy *IdentityMD) {
223218
if (OldFunc->isDeclaration())
224219
return;
225220

@@ -260,9 +255,10 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
260255
// Loop over all instructions, fixing each one as we find it, and any
261256
// attached debug-info records.
262257
for (Instruction &II : *BB) {
263-
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
258+
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer,
259+
IdentityMD);
264260
RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
265-
RemapFlag, TypeMapper, Materializer);
261+
RemapFlag, TypeMapper, Materializer, IdentityMD);
266262
}
267263
}
268264

@@ -325,16 +321,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
325321
DISubprogram *SPClonedWithinModule =
326322
ProcessSubprogramAttachment(*OldFunc, Changes, DIFinder);
327323

328-
ModuleLevelChanges =
329-
BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
324+
MetadataSetTy IdentityMD;
325+
FindDebugInfoToIdentityMap(IdentityMD, Changes, DIFinder,
326+
SPClonedWithinModule);
330327

331-
const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
328+
// Current implementation always upgrades from local changes to module level
329+
// changes due to the way metadata cloning is done. See
330+
// BuildDebugInfoToIdentityMap for more details.
331+
const auto RemapFlag = RF_None;
332332

333333
CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
334-
Materializer);
334+
Materializer, &IdentityMD);
335335

336336
CloneFunctionBodyInto(NewFunc, OldFunc, VMap, RemapFlag, Returns, NameSuffix,
337-
CodeInfo, TypeMapper, Materializer);
337+
CodeInfo, TypeMapper, Materializer, &IdentityMD);
338338

339339
// Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
340340
// same module, the compile unit will already be listed (or not). When

llvm/lib/Transforms/Utils/ValueMapper.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,14 @@ class Mapper {
120120
SmallVector<WorklistEntry, 4> Worklist;
121121
SmallVector<DelayedBasicBlock, 1> DelayedBBs;
122122
SmallVector<Constant *, 16> AppendingInits;
123+
const MetadataSetTy *IdentityMD;
123124

124125
public:
125126
Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
126-
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
127+
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer,
128+
const MetadataSetTy *IdentityMD)
127129
: Flags(Flags), TypeMapper(TypeMapper),
128-
MCs(1, MappingContext(VM, Materializer)) {}
130+
MCs(1, MappingContext(VM, Materializer)), IdentityMD(IdentityMD) {}
129131

130132
/// ValueMapper should explicitly call \a flush() before destruction.
131133
~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
@@ -900,6 +902,14 @@ std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
900902
return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
901903
}
902904

905+
// Map metadata from IdentityMD on first use. We need to add these nodes to
906+
// the mapping as otherwise metadata nodes numbering gets messed up. This is
907+
// still economical because the amount of data in IdentityMD may be a lot
908+
// larger than what will actually get used.
909+
if (IdentityMD && IdentityMD->contains(MD)) {
910+
return getVM().MD()[MD] = TrackingMDRef(const_cast<Metadata *>(MD));
911+
}
912+
903913
assert(isa<MDNode>(MD) && "Expected a metadata node");
904914

905915
return std::nullopt;
@@ -1199,8 +1209,9 @@ class FlushingMapper {
11991209

12001210
ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
12011211
ValueMapTypeRemapper *TypeMapper,
1202-
ValueMaterializer *Materializer)
1203-
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
1212+
ValueMaterializer *Materializer,
1213+
const MetadataSetTy *IdentityMD)
1214+
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer, IdentityMD)) {}
12041215

12051216
ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
12061217

0 commit comments

Comments
 (0)