@@ -761,27 +761,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
761
761
// Detect cases where we're performing call slot forwarding, but
762
762
// happen to be using a load-store pair to implement it, rather than
763
763
// a memcpy.
764
- CallInst *C = nullptr ;
765
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
766
- MSSA->getWalker ()->getClobberingMemoryAccess (LI))) {
767
- // The load most post-dom the call. Limit to the same block for now.
768
- // TODO: Support non-local call-slot optimization?
769
- if (LoadClobber->getBlock () == SI->getParent ())
770
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst ());
771
- }
772
-
773
- if (C) {
774
- bool changed = performCallSlotOptzn (
775
- LI, SI, SI->getPointerOperand ()->stripPointerCasts (),
776
- LI->getPointerOperand ()->stripPointerCasts (),
777
- DL.getTypeStoreSize (SI->getOperand (0 )->getType ()),
778
- commonAlignment (SI->getAlign (), LI->getAlign ()), C);
779
- if (changed) {
780
- eraseInstruction (SI);
781
- eraseInstruction (LI);
782
- ++NumMemCpyInstr;
783
- return true ;
784
- }
764
+ auto GetCall = [&]() -> CallInst * {
765
+ // We defer this expensive clobber walk until the cheap checks
766
+ // have been done on the source inside performCallSlotOptzn.
767
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
768
+ MSSA->getWalker ()->getClobberingMemoryAccess (LI)))
769
+ return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst ());
770
+ return nullptr ;
771
+ };
772
+
773
+ bool changed = performCallSlotOptzn (
774
+ LI, SI, SI->getPointerOperand ()->stripPointerCasts (),
775
+ LI->getPointerOperand ()->stripPointerCasts (),
776
+ DL.getTypeStoreSize (SI->getOperand (0 )->getType ()),
777
+ commonAlignment (SI->getAlign (), LI->getAlign ()), GetCall);
778
+ if (changed) {
779
+ eraseInstruction (SI);
780
+ eraseInstruction (LI);
781
+ ++NumMemCpyInstr;
782
+ return true ;
785
783
}
786
784
}
787
785
}
@@ -856,7 +854,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
856
854
bool MemCpyOptPass::performCallSlotOptzn (Instruction *cpyLoad,
857
855
Instruction *cpyStore, Value *cpyDest,
858
856
Value *cpySrc, TypeSize cpySize,
859
- Align cpyAlign, CallInst *C) {
857
+ Align cpyAlign,
858
+ std::function<CallInst *()> GetC) {
860
859
// The general transformation to keep in mind is
861
860
//
862
861
// call @func(..., src, ...)
@@ -875,11 +874,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
875
874
if (cpySize.isScalable ())
876
875
return false ;
877
876
878
- // Lifetime marks shouldn't be operated on.
879
- if (Function *F = C->getCalledFunction ())
880
- if (F->isIntrinsic () && F->getIntrinsicID () == Intrinsic::lifetime_start)
881
- return false ;
882
-
883
877
// Require that src be an alloca. This simplifies the reasoning considerably.
884
878
auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
885
879
if (!srcAlloca)
@@ -896,6 +890,16 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
896
890
if (cpySize < srcSize)
897
891
return false ;
898
892
893
+ CallInst *C = GetC ();
894
+ if (!C)
895
+ return false ;
896
+
897
+ // Lifetime marks shouldn't be operated on.
898
+ if (Function *F = C->getCalledFunction ())
899
+ if (F->isIntrinsic () && F->getIntrinsicID () == Intrinsic::lifetime_start)
900
+ return false ;
901
+
902
+
899
903
if (C->getParent () != cpyStore->getParent ()) {
900
904
LLVM_DEBUG (dbgs () << " Call Slot: block local restriction\n " );
901
905
return false ;
@@ -1459,7 +1463,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1459
1463
if (performCallSlotOptzn (
1460
1464
M, M, M->getDest (), M->getSource (),
1461
1465
TypeSize::getFixed (CopySize->getZExtValue ()), Alignment,
1462
- C )) {
1466
+ [C]() -> CallInst * { return C; } )) {
1463
1467
LLVM_DEBUG (dbgs () << " Performed call slot optimization:\n "
1464
1468
<< " call: " << *C << " \n "
1465
1469
<< " memcpy: " << *M << " \n " );
0 commit comments