Skip to content

Commit e312437

Browse files
committed
[PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
1 parent 532c89f commit e312437

File tree

1 file changed

+50
-34
lines changed

1 file changed

+50
-34
lines changed

llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -281,18 +281,44 @@ struct AllocaInfo {
281281
}
282282
};
283283

284+
template <typename T> class VectorWithUndo {
285+
SmallVector<T, 8> Vals;
286+
SmallVector<std::pair<size_t, T>, 8> Undo;
287+
288+
public:
289+
void undo(size_t S) {
290+
while (S < Undo.size()) {
291+
Vals[Undo.back().first] = Undo.back().second;
292+
Undo.pop_back();
293+
}
294+
}
295+
296+
void assign(size_t Sz, const T &Val) { Vals.assign(Sz, Val); }
297+
298+
size_t size() const { return Undo.size(); }
299+
300+
const T &operator[](size_t Idx) const { return Vals[Idx]; }
301+
302+
void set(size_t Idx, const T &Val) {
303+
if (Vals[Idx] == Val)
304+
return;
305+
Undo.emplace_back(Idx, Vals[Idx]);
306+
Vals[Idx] = Val;
307+
}
308+
309+
void init(size_t Idx, const T &Val) { Vals[Idx] = Val; }
310+
};
311+
284312
/// Data package used by RenamePass().
285313
struct RenamePassData {
286-
using ValVector = std::vector<Value *>;
287-
using LocationVector = std::vector<DebugLoc>;
288-
289-
RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
290-
: BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
314+
RenamePassData(BasicBlock *B, BasicBlock *P, size_t V, size_t L)
315+
: BB(B), Pred(P), UndoVals(V), UndoLocs(L) {}
291316

292317
BasicBlock *BB;
293318
BasicBlock *Pred;
294-
ValVector Values;
295-
LocationVector Locations;
319+
320+
size_t UndoVals;
321+
size_t UndoLocs;
296322
};
297323

298324
/// This assigns and keeps a per-bb relative ordering of load/store
@@ -393,10 +419,10 @@ struct PromoteMem2Reg {
393419
SmallVector<unsigned> BBNumPreds;
394420

395421
/// The state of incoming values for the current DFS step.
396-
RenamePassData::ValVector IncomingVals;
422+
VectorWithUndo<Value *> IncomingVals;
397423

398424
/// The state of incoming locations for the current DFS step.
399-
RenamePassData::LocationVector IncomingLocs;
425+
VectorWithUndo<DebugLoc> IncomingLocs;
400426

401427
// DFS work stack.
402428
SmallVector<RenamePassData, 8> Worklist;
@@ -445,17 +471,15 @@ struct PromoteMem2Reg {
445471
DVRAssignsToDelete.clear();
446472
}
447473

448-
void pushToWorklist(BasicBlock *BB, BasicBlock *Pred,
449-
RenamePassData::ValVector IncVals,
450-
RenamePassData::LocationVector IncLocs) {
451-
Worklist.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs));
474+
void pushToWorklist(BasicBlock *BB, BasicBlock *Pred) {
475+
Worklist.emplace_back(BB, Pred, IncomingVals.size(), IncomingVals.size());
452476
}
453477

454478
RenamePassData popFromWorklist() {
455-
RenamePassData R = std::move(Worklist.back());
479+
RenamePassData R = Worklist.back();
456480
Worklist.pop_back();
457-
IncomingVals = std::move(R.Values);
458-
IncomingLocs = std::move(R.Locations);
481+
IncomingVals.undo(R.UndoVals);
482+
IncomingLocs.undo(R.UndoLocs);
459483
return R;
460484
}
461485
};
@@ -871,22 +895,20 @@ void PromoteMem2Reg::run() {
871895
// been stored yet. In this case, it will get this null value.
872896
IncomingVals.assign(Allocas.size(), nullptr);
873897
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
874-
IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType());
898+
IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType()));
875899

876900
// When handling debug info, treat all incoming values as if they have unknown
877901
// locations until proven otherwise.
878902
IncomingLocs.assign(Allocas.size(), {});
879903

880904
// The renamer uses the Visited set to avoid infinite loops.
881-
Visited.resize(F.getMaxBlockNumber());
905+
Visited.resize(F.getMaxBlockNumber(), false);
906+
907+
// Add the entry block to the worklist, with a null predecessor.
908+
pushToWorklist(&F.front(), nullptr);
882909

883-
// Walks all basic blocks in the function performing the SSA rename algorithm
884-
// and inserting the phi nodes we marked as necessary
885-
pushToWorklist(&F.front(), nullptr, std::move(IncomingVals),
886-
std::move(IncomingLocs));
887910
do {
888911
RenamePassData RPD = popFromWorklist();
889-
// RenamePass may add new worklist entries.
890912
RenamePass(RPD.BB, RPD.Pred);
891913
} while (!Worklist.empty());
892914

@@ -1153,7 +1175,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
11531175
APN->setHasNoSignedZeros(true);
11541176

11551177
// The currently active variable for this block is now the PHI.
1156-
IncomingVals[AllocaNo] = APN;
1178+
IncomingVals.set(AllocaNo, APN);
11571179
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
11581180
auto ConvertDbgDeclares = [&](auto &Container) {
11591181
for (auto *DbgItem : Container)
@@ -1211,10 +1233,10 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
12111233

12121234
// what value were we writing?
12131235
unsigned AllocaNo = ai->second;
1214-
IncomingVals[AllocaNo] = SI->getOperand(0);
1236+
IncomingVals.set(AllocaNo, SI->getOperand(0));
12151237

12161238
// Record debuginfo for the store before removing it.
1217-
IncomingLocs[AllocaNo] = SI->getDebugLoc();
1239+
IncomingLocs.set(AllocaNo, SI->getDebugLoc());
12181240
AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, &DbgAssignsToDelete,
12191241
&DVRAssignsToDelete);
12201242
auto ConvertDbgDeclares = [&](auto &Container) {
@@ -1234,14 +1256,8 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
12341256
SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
12351257

12361258
for (BasicBlock *S : reverse(successors(BB)))
1237-
if (VisitedSuccs.insert(S).second) {
1238-
if (VisitedSuccs.size() > 1) {
1239-
// Let the first successor own allocated arrays, other will make a copy.
1240-
IncomingVals = Worklist.back().Values;
1241-
IncomingLocs = Worklist.back().Locations;
1242-
}
1243-
pushToWorklist(S, BB, std::move(IncomingVals), std::move(IncomingLocs));
1244-
}
1259+
if (VisitedSuccs.insert(S).second)
1260+
pushToWorklist(S, BB);
12451261
}
12461262

12471263
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,

0 commit comments

Comments
 (0)