@@ -167,6 +167,7 @@ class Slice {
167
167
void makeUnsplittable () { UseAndIsSplittable.setInt (false ); }
168
168
169
169
Use *getUse () const { return UseAndIsSplittable.getPointer (); }
170
+ void setUse (Use *U) { UseAndIsSplittable.setPointer (U); }
170
171
171
172
bool isDead () const { return getUse () == nullptr ; }
172
173
void kill () { UseAndIsSplittable.setPointer (nullptr ); }
@@ -218,7 +219,7 @@ class Slice {
218
219
class llvm ::sroa::AllocaSlices {
219
220
public:
220
221
// / Construct the slices of a particular alloca.
221
- AllocaSlices (const DataLayout &DL, AllocaInst &AI);
222
+ AllocaSlices (const DataLayout &DL, AllocaInst &AI, bool &Changed );
222
223
223
224
// / Test whether a pointer to the allocation escapes our analysis.
224
225
// /
@@ -270,6 +271,12 @@ class llvm::sroa::AllocaSlices {
270
271
return DeadUseIfPromotable;
271
272
}
272
273
274
+ void forgetTheDead () {
275
+ DeadUsers.clear ();
276
+ DeadUseIfPromotable.clear ();
277
+ DeadOperands.clear ();
278
+ };
279
+
273
280
// / Access the dead operands referring to this alloca.
274
281
// /
275
282
// / These are operands which have cannot actually be used to refer to the
@@ -295,11 +302,21 @@ class llvm::sroa::AllocaSlices {
295
302
296
303
friend class AllocaSlices ::SliceBuilder;
297
304
305
+ void formBackingAlloca (AllocaInst *AI, bool &Changed);
306
+
298
307
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
299
308
// / Handle to alloca instruction to simplify method interfaces.
300
309
AllocaInst &AI;
301
310
#endif
302
311
312
+ // / Certain escaping uses of an alloca (non-capturing-ones)
313
+ // / do not prevent promotion, but force retention of the alloca.
314
+ // / This records if there are any such uses.
315
+ bool NeedsBackingAlloca = false ;
316
+
317
+ // / Track if there are any `select`s/PHI's involving the alloca pointers.
318
+ bool HavePHINodesOrSelectInstrs = false ;
319
+
303
320
// / The instruction responsible for this alloca not having a known set
304
321
// / of slices.
305
322
// /
@@ -1055,18 +1072,35 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1055
1072
return ;
1056
1073
}
1057
1074
1075
+ AS.HavePHINodesOrSelectInstrs = true ;
1076
+ if (AS.NeedsBackingAlloca && AS.HavePHINodesOrSelectInstrs )
1077
+ return PI.setAborted (&I);
1078
+
1058
1079
insertUse (I, Offset, Size);
1059
1080
}
1060
1081
1061
1082
void visitPHINode (PHINode &PN) { visitPHINodeOrSelectInst (PN); }
1062
1083
1063
1084
void visitSelectInst (SelectInst &SI) { visitPHINodeOrSelectInst (SI); }
1064
1085
1086
+ void visitCallBase (CallBase &CB) {
1087
+ if (!IsOffsetKnown || !CB.doesNotCapture (U->getOperandNo ()))
1088
+ return PI.setAborted (&CB);
1089
+ // If we know that the callee does not retain the pointer,
1090
+ // then it does not prevent SROA, although we have to workaround this.
1091
+ // However, for now, only allow uses, that, at most, read from said memory.
1092
+ if (!CB.onlyReadsMemory () && !CB.onlyReadsMemory (U->getOperandNo ()))
1093
+ return PI.setAborted (&CB);
1094
+ AS.NeedsBackingAlloca = true ;
1095
+ if (AS.NeedsBackingAlloca && AS.HavePHINodesOrSelectInstrs )
1096
+ return PI.setAborted (&CB);
1097
+ }
1098
+
1065
1099
// / Disable SROA entirely if there are unhandled users of the alloca.
1066
1100
void visitInstruction (Instruction &I) { PI.setAborted (&I); }
1067
1101
};
1068
1102
1069
- AllocaSlices::AllocaSlices (const DataLayout &DL, AllocaInst &AI)
1103
+ AllocaSlices::AllocaSlices (const DataLayout &DL, AllocaInst &AI, bool &Changed )
1070
1104
:
1071
1105
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1072
1106
AI (AI),
@@ -1083,6 +1117,10 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1083
1117
return ;
1084
1118
}
1085
1119
1120
+ // We may have found that the pointer to the AI escapes, but isn't captured.
1121
+ if (NeedsBackingAlloca)
1122
+ formBackingAlloca (&AI, Changed);
1123
+
1086
1124
llvm::erase_if (Slices, [](const Slice &S) { return S.isDead (); });
1087
1125
1088
1126
// Sort the uses. This arranges for the offsets to be in ascending order,
@@ -3587,6 +3625,122 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3587
3625
3588
3626
} // end anonymous namespace
3589
3627
3628
+ // / Apparently, we can promote the alloca, but some uses of the alloca
3629
+ // / are calls (that don't capture it's address), which require for the
3630
+ // / trace alloca to remain. To do so, we must form a new "backing" alloca,
3631
+ // / which will be kept as an up-to-date backup of the to-be-promoted-alloca's
3632
+ // / content, and used in it's place in these non-capturing calls.
3633
+ // / FIXME: support non-readonly non-capturing calls.
3634
+ void AllocaSlices::formBackingAlloca (AllocaInst *AllocaToPromote,
3635
+ bool &Changed) {
3636
+ assert (NeedsBackingAlloca &&
3637
+ " Should not be called if there is no need to rewrite." );
3638
+
3639
+ // We are going to preserve all of the original instructions that were
3640
+ // operating on the original alloca, so we must forget any instructions
3641
+ // that were deemed as dead-to-be-deleted during normal promotion.
3642
+ forgetTheDead ();
3643
+
3644
+ Changed = true ;
3645
+
3646
+ // Now, we want to retain all of the instructions operating on the original
3647
+ // alloca, so to avoid much hassle, create a new alloca, and swap (RAUW) them.
3648
+ AllocaInst *ShadowAlloca = cast<AllocaInst>(AllocaToPromote->clone ());
3649
+ ShadowAlloca->takeName (AllocaToPromote);
3650
+ AllocaToPromote->setName (ShadowAlloca->getName () + " .prom" );
3651
+ ShadowAlloca->insertBefore (AllocaToPromote);
3652
+ AllocaToPromote->replaceAllUsesWith (ShadowAlloca);
3653
+
3654
+ // Avoid recomputing the same pointer over and over again, cache it.
3655
+ SmallDenseMap<std::pair<uint64_t , Type *>, Value *> RebasedPtrsCSE;
3656
+
3657
+ // Don't do anything fancy, just put new insts "right after" the alloca.
3658
+ IRBuilderTy Builder (AllocaToPromote->getContext ());
3659
+ BasicBlock *AllocaToPromoteBB = AllocaToPromote->getParent ();
3660
+ Builder.SetInsertPoint (AllocaToPromoteBB,
3661
+ AllocaToPromoteBB->getFirstInsertionPt ());
3662
+
3663
+ // Give a pointer `Offset` bytes into the `AllocaToPromote` with `PtrTy` type.
3664
+ auto getRebasedPtr = [&RebasedPtrsCSE, &Builder, AllocaToPromote,
3665
+ DL = AllocaToPromote->getModule ()->getDataLayout ()](
3666
+ PointerType *PtrTy, const uint64_t Offset) {
3667
+ // Look it up in a cache first.
3668
+ auto It = RebasedPtrsCSE.find ({Offset, PtrTy});
3669
+ if (It != RebasedPtrsCSE.end ())
3670
+ return It->second ;
3671
+
3672
+ // Otherwise, create a new pointer, and cache it for the future.
3673
+ Value *NewPtr = getAdjustedPtr (
3674
+ Builder, DL, AllocaToPromote,
3675
+ APInt (DL.getIndexSizeInBits (PtrTy->getAddressSpace ()), Offset), PtrTy,
3676
+ " " );
3677
+ RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr;
3678
+
3679
+ return NewPtr;
3680
+ };
3681
+
3682
+ // Some instructions may have several uses of an alloca, and there's
3683
+ // a separate slice for each use, so we must cache each instruction
3684
+ // we clone, so that we only clone it once,
3685
+ // not for each slice that references it.
3686
+ SmallDenseMap<Instruction *, Instruction *> InstrCloneMap;
3687
+
3688
+ // Now, let's just deal with each slice. Roughly, we need to clone each
3689
+ // instruction that is referenced by a slice (once per instruction!),
3690
+ // and change the appropriate pointer from pointing at the shadow alloca
3691
+ // into pointing into the alloca we are going to promote.
3692
+ //
3693
+ // NOTE: the original instruction is generally preserved,
3694
+ // because we need to maintain the content parity between the two allocas!
3695
+ for (Slice &S : Slices) {
3696
+ // Just completely ignore dead slices.
3697
+ if (S.isDead ())
3698
+ continue ;
3699
+
3700
+ // Which instruction does this slice represent?
3701
+ Use *OrigUse = S.getUse ();
3702
+ auto *OrigInstr = cast<Instruction>(OrigUse->getUser ());
3703
+
3704
+ // Now, we need to make a clone of this instruction, but operating on
3705
+ // the alloca-to-be-promoted instead.
3706
+ Instruction *ClonedInstr;
3707
+ // Only clone instruction once! See if we already did that for this instr.
3708
+ auto It = InstrCloneMap.find (OrigInstr);
3709
+ if (It != InstrCloneMap.end ())
3710
+ ClonedInstr = It->second ;
3711
+ else {
3712
+ // This is the first time this instruction is seen.
3713
+ // Clone it next to the original instruction, and cache it.
3714
+ ClonedInstr = OrigInstr->clone ();
3715
+ ClonedInstr->insertBefore (OrigInstr);
3716
+ InstrCloneMap.insert ({OrigInstr, ClonedInstr});
3717
+
3718
+ // Also, if the instruction was returning anything, we do that instead.
3719
+ if (!ClonedInstr->getType ()->isVoidTy ()) {
3720
+ assert (isa<LoadInst>(OrigInstr) &&
3721
+ " Not expecting to encounter here anything other than a `load`." );
3722
+ ClonedInstr->setName (OrigInstr->getName () + " .prom" );
3723
+ OrigInstr->replaceAllUsesWith (ClonedInstr);
3724
+ }
3725
+
3726
+ if (isa<LoadInst>(OrigInstr))
3727
+ // We know that all the offending (non-capturing) calls do not modify
3728
+ // the content of the shadow alloca, so we do not need to propagate
3729
+ // the content of the shadow alloca to the alloca-to-be-promoted.
3730
+ DeadUsers.push_back (OrigInstr);
3731
+ }
3732
+
3733
+ // Final touch: the slice should refer to the
3734
+ // use of the alloca-to-be-promoted, while it currently refers to
3735
+ // use of the shadow alloca, so rectify that.
3736
+ Value *NewPtr = getRebasedPtr (cast<PointerType>(OrigUse->get ()->getType ()),
3737
+ S.beginOffset ());
3738
+ Use &ClonedUse = ClonedInstr->getOperandUse (OrigUse->getOperandNo ());
3739
+ ClonedUse.set (NewPtr);
3740
+ S.setUse (&ClonedUse);
3741
+ }
3742
+ }
3743
+
3590
3744
// / Strip aggregate type wrapping.
3591
3745
// /
3592
3746
// / This removes no-op aggregate types wrapping an underlying type. It will
@@ -4612,7 +4766,7 @@ bool SROAPass::runOnAlloca(AllocaInst &AI) {
4612
4766
Changed |= AggRewriter.rewrite (AI);
4613
4767
4614
4768
// Build the slices using a recursive instruction-visiting builder.
4615
- AllocaSlices AS (DL, AI);
4769
+ AllocaSlices AS (DL, AI, Changed );
4616
4770
LLVM_DEBUG (AS.print (dbgs ()));
4617
4771
if (AS.isEscaped ())
4618
4772
return Changed;
0 commit comments