Skip to content

Commit adc0984

Browse files
committed
Reland [SROA] Maintain shadow/backing alloca when some slices are noncapturnig read-only calls to allow alloca partitioning/promotion
This is inspired by the original variant of D109749 by Graham Hunter, but is a more general version. Roughly, instead of promoting the alloca, we call it a shadow/backing alloca, go through all it's slices, clone(!) instructions that operated on it, but make them operate on the cloned alloca, and promote cloned alloca instead. This keeps the shadow/backing alloca, and all the original instructions around, which results in said shadow/backing alloca being a perfect mirror/representation of the promoted alloca's content, so calls that take the alloca as arguments (non-capturingly!) can be supported. For now, we require that the calls also don't modify the alloca's content, but that is only to simplify the initial implementation, and that will be supported in a follow-up. Overall, this leads to *smaller* codesize: https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=size-total and is roughly neutral compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=instructions This relands commit 703240c, that was reverted by commit 7405581, because the assertion `isa<LoadInst>(OrigInstr)` didn't hold in practice, as the newly added test `@select_of_ptrs` shows: If the pointers into alloca are used by select's/PHI's, then even if we manage to fracture the alloca, some sub-alloca's will likely remain. And if there are any non-capturing calls, then we will also decide to keep the original backing alloca around, and we suddenly ~doubled the alloca size, and the amount of memory traffic. I'm not sure if this is a problem or we could live with it, but let's leave that for later... Reviewed By: djtodoro Differential Revision: https://reviews.llvm.org/D113520
1 parent 168fc01 commit adc0984

File tree

2 files changed

+273
-183
lines changed

2 files changed

+273
-183
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 157 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class Slice {
167167
void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
168168

169169
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
170+
void setUse(Use *U) { UseAndIsSplittable.setPointer(U); }
170171

171172
bool isDead() const { return getUse() == nullptr; }
172173
void kill() { UseAndIsSplittable.setPointer(nullptr); }
@@ -218,7 +219,7 @@ class Slice {
218219
class llvm::sroa::AllocaSlices {
219220
public:
220221
/// Construct the slices of a particular alloca.
221-
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
222+
AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed);
222223

223224
/// Test whether a pointer to the allocation escapes our analysis.
224225
///
@@ -270,6 +271,12 @@ class llvm::sroa::AllocaSlices {
270271
return DeadUseIfPromotable;
271272
}
272273

274+
void forgetTheDead() {
275+
DeadUsers.clear();
276+
DeadUseIfPromotable.clear();
277+
DeadOperands.clear();
278+
};
279+
273280
/// Access the dead operands referring to this alloca.
274281
///
275282
/// These are operands which have cannot actually be used to refer to the
@@ -295,11 +302,21 @@ class llvm::sroa::AllocaSlices {
295302

296303
friend class AllocaSlices::SliceBuilder;
297304

305+
void formBackingAlloca(AllocaInst *AI, bool &Changed);
306+
298307
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
299308
/// Handle to alloca instruction to simplify method interfaces.
300309
AllocaInst &AI;
301310
#endif
302311

312+
/// Certain escaping uses of an alloca (non-capturing-ones)
313+
/// do not prevent promotion, but force retention of the alloca.
314+
/// This records if there are any such uses.
315+
bool NeedsBackingAlloca = false;
316+
317+
/// Track if there are any `select`s/PHI's involving the alloca pointers.
318+
bool HavePHINodesOrSelectInstrs = false;
319+
303320
/// The instruction responsible for this alloca not having a known set
304321
/// of slices.
305322
///
@@ -1055,18 +1072,35 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
10551072
return;
10561073
}
10571074

1075+
AS.HavePHINodesOrSelectInstrs = true;
1076+
if (AS.NeedsBackingAlloca && AS.HavePHINodesOrSelectInstrs)
1077+
return PI.setAborted(&I);
1078+
10581079
insertUse(I, Offset, Size);
10591080
}
10601081

10611082
void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
10621083

10631084
void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
10641085

1086+
void visitCallBase(CallBase &CB) {
1087+
if (!IsOffsetKnown || !CB.doesNotCapture(U->getOperandNo()))
1088+
return PI.setAborted(&CB);
1089+
// If we know that the callee does not retain the pointer,
1090+
// then it does not prevent SROA, although we have to workaround this.
1091+
// However, for now, only allow uses, that, at most, read from said memory.
1092+
if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(U->getOperandNo()))
1093+
return PI.setAborted(&CB);
1094+
AS.NeedsBackingAlloca = true;
1095+
if (AS.NeedsBackingAlloca && AS.HavePHINodesOrSelectInstrs)
1096+
return PI.setAborted(&CB);
1097+
}
1098+
10651099
/// Disable SROA entirely if there are unhandled users of the alloca.
10661100
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
10671101
};
10681102

1069-
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1103+
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed)
10701104
:
10711105
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10721106
AI(AI),
@@ -1083,6 +1117,10 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
10831117
return;
10841118
}
10851119

1120+
// We may have found that the pointer to the AI escapes, but isn't captured.
1121+
if (NeedsBackingAlloca)
1122+
formBackingAlloca(&AI, Changed);
1123+
10861124
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
10871125

10881126
// Sort the uses. This arranges for the offsets to be in ascending order,
@@ -3587,6 +3625,122 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
35873625

35883626
} // end anonymous namespace
35893627

3628+
/// Apparently, we can promote the alloca, but some uses of the alloca
3629+
/// are calls (that don't capture it's address), which require for the
3630+
/// trace alloca to remain. To do so, we must form a new "backing" alloca,
3631+
/// which will be kept as an up-to-date backup of the to-be-promoted-alloca's
3632+
/// content, and used in it's place in these non-capturing calls.
3633+
/// FIXME: support non-readonly non-capturing calls.
3634+
void AllocaSlices::formBackingAlloca(AllocaInst *AllocaToPromote,
3635+
bool &Changed) {
3636+
assert(NeedsBackingAlloca &&
3637+
"Should not be called if there is no need to rewrite.");
3638+
3639+
// We are going to preserve all of the original instructions that were
3640+
// operating on the original alloca, so we must forget any instructions
3641+
// that were deemed as dead-to-be-deleted during normal promotion.
3642+
forgetTheDead();
3643+
3644+
Changed = true;
3645+
3646+
// Now, we want to retain all of the instructions operating on the original
3647+
// alloca, so to avoid much hassle, create a new alloca, and swap (RAUW) them.
3648+
AllocaInst *ShadowAlloca = cast<AllocaInst>(AllocaToPromote->clone());
3649+
ShadowAlloca->takeName(AllocaToPromote);
3650+
AllocaToPromote->setName(ShadowAlloca->getName() + ".prom");
3651+
ShadowAlloca->insertBefore(AllocaToPromote);
3652+
AllocaToPromote->replaceAllUsesWith(ShadowAlloca);
3653+
3654+
// Avoid recomputing the same pointer over and over again, cache it.
3655+
SmallDenseMap<std::pair<uint64_t, Type *>, Value *> RebasedPtrsCSE;
3656+
3657+
// Don't do anything fancy, just put new insts "right after" the alloca.
3658+
IRBuilderTy Builder(AllocaToPromote->getContext());
3659+
BasicBlock *AllocaToPromoteBB = AllocaToPromote->getParent();
3660+
Builder.SetInsertPoint(AllocaToPromoteBB,
3661+
AllocaToPromoteBB->getFirstInsertionPt());
3662+
3663+
// Give a pointer `Offset` bytes into the `AllocaToPromote` with `PtrTy` type.
3664+
auto getRebasedPtr = [&RebasedPtrsCSE, &Builder, AllocaToPromote,
3665+
DL = AllocaToPromote->getModule()->getDataLayout()](
3666+
PointerType *PtrTy, const uint64_t Offset) {
3667+
// Look it up in a cache first.
3668+
auto It = RebasedPtrsCSE.find({Offset, PtrTy});
3669+
if (It != RebasedPtrsCSE.end())
3670+
return It->second;
3671+
3672+
// Otherwise, create a new pointer, and cache it for the future.
3673+
Value *NewPtr = getAdjustedPtr(
3674+
Builder, DL, AllocaToPromote,
3675+
APInt(DL.getIndexSizeInBits(PtrTy->getAddressSpace()), Offset), PtrTy,
3676+
"");
3677+
RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr;
3678+
3679+
return NewPtr;
3680+
};
3681+
3682+
// Some instructions may have several uses of an alloca, and there's
3683+
// a separate slice for each use, so we must cache each instruction
3684+
// we clone, so that we only clone it once,
3685+
// not for each slice that references it.
3686+
SmallDenseMap<Instruction *, Instruction *> InstrCloneMap;
3687+
3688+
// Now, let's just deal with each slice. Roughly, we need to clone each
3689+
// instruction that is referenced by a slice (once per instruction!),
3690+
// and change the appropriate pointer from pointing at the shadow alloca
3691+
// into pointing into the alloca we are going to promote.
3692+
//
3693+
// NOTE: the original instruction is generally preserved,
3694+
// because we need to maintain the content parity between the two allocas!
3695+
for (Slice &S : Slices) {
3696+
// Just completely ignore dead slices.
3697+
if (S.isDead())
3698+
continue;
3699+
3700+
// Which instruction does this slice represent?
3701+
Use *OrigUse = S.getUse();
3702+
auto *OrigInstr = cast<Instruction>(OrigUse->getUser());
3703+
3704+
// Now, we need to make a clone of this instruction, but operating on
3705+
// the alloca-to-be-promoted instead.
3706+
Instruction *ClonedInstr;
3707+
// Only clone instruction once! See if we already did that for this instr.
3708+
auto It = InstrCloneMap.find(OrigInstr);
3709+
if (It != InstrCloneMap.end())
3710+
ClonedInstr = It->second;
3711+
else {
3712+
// This is the first time this instruction is seen.
3713+
// Clone it next to the original instruction, and cache it.
3714+
ClonedInstr = OrigInstr->clone();
3715+
ClonedInstr->insertBefore(OrigInstr);
3716+
InstrCloneMap.insert({OrigInstr, ClonedInstr});
3717+
3718+
// Also, if the instruction was returning anything, we do that instead.
3719+
if (!ClonedInstr->getType()->isVoidTy()) {
3720+
assert(isa<LoadInst>(OrigInstr) &&
3721+
"Not expecting to encounter here anything other than a `load`.");
3722+
ClonedInstr->setName(OrigInstr->getName() + ".prom");
3723+
OrigInstr->replaceAllUsesWith(ClonedInstr);
3724+
}
3725+
3726+
if (isa<LoadInst>(OrigInstr))
3727+
// We know that all the offending (non-capturing) calls do not modify
3728+
// the content of the shadow alloca, so we do not need to propagate
3729+
// the content of the shadow alloca to the alloca-to-be-promoted.
3730+
DeadUsers.push_back(OrigInstr);
3731+
}
3732+
3733+
// Final touch: the slice should refer to the
3734+
// use of the alloca-to-be-promoted, while it currently refers to
3735+
// use of the shadow alloca, so rectify that.
3736+
Value *NewPtr = getRebasedPtr(cast<PointerType>(OrigUse->get()->getType()),
3737+
S.beginOffset());
3738+
Use &ClonedUse = ClonedInstr->getOperandUse(OrigUse->getOperandNo());
3739+
ClonedUse.set(NewPtr);
3740+
S.setUse(&ClonedUse);
3741+
}
3742+
}
3743+
35903744
/// Strip aggregate type wrapping.
35913745
///
35923746
/// This removes no-op aggregate types wrapping an underlying type. It will
@@ -4612,7 +4766,7 @@ bool SROAPass::runOnAlloca(AllocaInst &AI) {
46124766
Changed |= AggRewriter.rewrite(AI);
46134767

46144768
// Build the slices using a recursive instruction-visiting builder.
4615-
AllocaSlices AS(DL, AI);
4769+
AllocaSlices AS(DL, AI, Changed);
46164770
LLVM_DEBUG(AS.print(dbgs()));
46174771
if (AS.isEscaped())
46184772
return Changed;

0 commit comments

Comments
 (0)