Skip to content

Commit 3531cc1

Browse files
authored
[PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (#142474)
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Also updates to IncomingVals and IncomingLocs are infrequent (only Load/Store into alloca affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Fixes #142461 On the attached to issue #142461 IR RSS drops from 35Gb to 1.8Gb. But it does not affect compile time on average https://llvm-compile-time-tracker.com/compare.php?from=2e98ed8caa0b47ee79af4ad24b5436a89fe49dfa&to=effac6d1fd600e544f8bc21382c7e541973b1378&stat=instructions:u
1 parent 6760857 commit 3531cc1

File tree

1 file changed

+55
-34
lines changed

1 file changed

+55
-34
lines changed

llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -281,18 +281,48 @@ struct AllocaInfo {
281281
}
282282
};
283283

284+
template <typename T> class VectorWithUndo {
285+
SmallVector<T, 8> Vals;
286+
SmallVector<std::pair<size_t, T>, 8> Undo;
287+
288+
public:
289+
void undo(size_t S) {
290+
assert(S <= Undo.size());
291+
while (S < Undo.size()) {
292+
Vals[Undo.back().first] = Undo.back().second;
293+
Undo.pop_back();
294+
}
295+
}
296+
297+
void resize(size_t Sz) { Vals.resize(Sz); }
298+
299+
size_t undoSize() const { return Undo.size(); }
300+
301+
const T &operator[](size_t Idx) const { return Vals[Idx]; }
302+
303+
void set(size_t Idx, const T &Val) {
304+
if (Vals[Idx] == Val)
305+
return;
306+
Undo.emplace_back(Idx, Vals[Idx]);
307+
Vals[Idx] = Val;
308+
}
309+
310+
void init(size_t Idx, const T &Val) {
311+
assert(Undo.empty());
312+
Vals[Idx] = Val;
313+
}
314+
};
315+
284316
/// Data package used by RenamePass().
285317
struct RenamePassData {
286-
using ValVector = std::vector<Value *>;
287-
using LocationVector = std::vector<DebugLoc>;
288-
289-
RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
290-
: BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
318+
RenamePassData(BasicBlock *B, BasicBlock *P, size_t V, size_t L)
319+
: BB(B), Pred(P), UndoVals(V), UndoLocs(L) {}
291320

292321
BasicBlock *BB;
293322
BasicBlock *Pred;
294-
ValVector Values;
295-
LocationVector Locations;
323+
324+
size_t UndoVals;
325+
size_t UndoLocs;
296326
};
297327

298328
/// This assigns and keeps a per-bb relative ordering of load/store
@@ -393,10 +423,10 @@ struct PromoteMem2Reg {
393423
SmallVector<unsigned> BBNumPreds;
394424

395425
/// The state of incoming values for the current DFS step.
396-
RenamePassData::ValVector IncomingVals;
426+
VectorWithUndo<Value *> IncomingVals;
397427

398428
/// The state of incoming locations for the current DFS step.
399-
RenamePassData::LocationVector IncomingLocs;
429+
VectorWithUndo<DebugLoc> IncomingLocs;
400430

401431
// DFS work stack.
402432
SmallVector<RenamePassData, 8> Worklist;
@@ -445,17 +475,16 @@ struct PromoteMem2Reg {
445475
DVRAssignsToDelete.clear();
446476
}
447477

448-
void pushToWorklist(BasicBlock *BB, BasicBlock *Pred,
449-
RenamePassData::ValVector IncVals,
450-
RenamePassData::LocationVector IncLocs) {
451-
Worklist.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs));
478+
void pushToWorklist(BasicBlock *BB, BasicBlock *Pred) {
479+
Worklist.emplace_back(BB, Pred, IncomingVals.undoSize(),
480+
IncomingLocs.undoSize());
452481
}
453482

454483
RenamePassData popFromWorklist() {
455-
RenamePassData R = std::move(Worklist.back());
484+
RenamePassData R = Worklist.back();
456485
Worklist.pop_back();
457-
IncomingVals = std::move(R.Values);
458-
IncomingLocs = std::move(R.Locations);
486+
IncomingVals.undo(R.UndoVals);
487+
IncomingLocs.undo(R.UndoLocs);
459488
return R;
460489
}
461490
};
@@ -871,22 +900,20 @@ void PromoteMem2Reg::run() {
871900
// been stored yet. In this case, it will get this null value.
872901
IncomingVals.resize(Allocas.size());
873902
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
874-
IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType());
903+
IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType()));
875904

876905
// When handling debug info, treat all incoming values as if they have unknown
877906
// locations until proven otherwise.
878907
IncomingLocs.resize(Allocas.size());
879908

880909
// The renamer uses the Visited set to avoid infinite loops.
881-
Visited.resize(F.getMaxBlockNumber());
910+
Visited.resize(F.getMaxBlockNumber(), false);
911+
912+
// Add the entry block to the worklist, with a null predecessor.
913+
pushToWorklist(&F.front(), nullptr);
882914

883-
// Walks all basic blocks in the function performing the SSA rename algorithm
884-
// and inserting the phi nodes we marked as necessary
885-
pushToWorklist(&F.front(), nullptr, std::move(IncomingVals),
886-
std::move(IncomingLocs));
887915
do {
888916
RenamePassData RPD = popFromWorklist();
889-
// RenamePass may add new worklist entries.
890917
RenamePass(RPD.BB, RPD.Pred);
891918
} while (!Worklist.empty());
892919

@@ -1153,7 +1180,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
11531180
APN->setHasNoSignedZeros(true);
11541181

11551182
// The currently active variable for this block is now the PHI.
1156-
IncomingVals[AllocaNo] = APN;
1183+
IncomingVals.set(AllocaNo, APN);
11571184
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
11581185
auto ConvertDbgDeclares = [&](auto &Container) {
11591186
for (auto *DbgItem : Container)
@@ -1211,10 +1238,10 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
12111238

12121239
// what value were we writing?
12131240
unsigned AllocaNo = ai->second;
1214-
IncomingVals[AllocaNo] = SI->getOperand(0);
1241+
IncomingVals.set(AllocaNo, SI->getOperand(0));
12151242

12161243
// Record debuginfo for the store before removing it.
1217-
IncomingLocs[AllocaNo] = SI->getDebugLoc();
1244+
IncomingLocs.set(AllocaNo, SI->getDebugLoc());
12181245
AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, &DbgAssignsToDelete,
12191246
&DVRAssignsToDelete);
12201247
auto ConvertDbgDeclares = [&](auto &Container) {
@@ -1234,14 +1261,8 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
12341261
SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
12351262

12361263
for (BasicBlock *S : reverse(successors(BB)))
1237-
if (VisitedSuccs.insert(S).second) {
1238-
if (VisitedSuccs.size() > 1) {
1239-
// Let the first successor own allocated arrays, other will make a copy.
1240-
IncomingVals = Worklist.back().Values;
1241-
IncomingLocs = Worklist.back().Locations;
1242-
}
1243-
pushToWorklist(S, BB, std::move(IncomingVals), std::move(IncomingLocs));
1244-
}
1264+
if (VisitedSuccs.insert(S).second)
1265+
pushToWorklist(S, BB);
12451266
}
12461267

12471268
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,

0 commit comments

Comments
 (0)