81
81
#include " llvm/Support/raw_ostream.h"
82
82
#include " llvm/Transforms/Scalar.h"
83
83
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84
- #include " llvm/Transforms/Utils/Local.h"
85
84
using namespace llvm ;
86
85
87
86
#define DEBUG_TYPE " tailcallelim"
@@ -93,10 +92,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
93
92
// / Scan the specified function for alloca instructions.
94
93
// / If it contains any dynamic allocas, returns false.
95
94
static bool canTRE (Function &F) {
96
- // TODO: We don't do TRE if dynamic allocas are used.
97
- // Dynamic allocas allocate stack space which should be
98
- // deallocated before new iteration started. That is
99
- // currently not implemented.
95
+ // Because of PR962, we don't TRE dynamic allocas.
100
96
return llvm::all_of (instructions (F), [](Instruction &I) {
101
97
auto *AI = dyn_cast<AllocaInst>(&I);
102
98
return !AI || AI->isStaticAlloca ();
@@ -189,9 +185,11 @@ struct AllocaDerivedValueTracker {
189
185
};
190
186
}
191
187
192
- static bool markTails (Function &F, OptimizationRemarkEmitter *ORE) {
188
+ static bool markTails (Function &F, bool &AllCallsAreTailCalls,
189
+ OptimizationRemarkEmitter *ORE) {
193
190
if (F.callsFunctionThatReturnsTwice ())
194
191
return false ;
192
+ AllCallsAreTailCalls = true ;
195
193
196
194
// The local stack holds all alloca instructions and all byval arguments.
197
195
AllocaDerivedValueTracker Tracker;
@@ -274,8 +272,11 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
274
272
}
275
273
}
276
274
277
- if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers .count (CI))
275
+ if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers .count (CI)) {
278
276
DeferredTails.push_back (CI);
277
+ } else {
278
+ AllCallsAreTailCalls = false ;
279
+ }
279
280
}
280
281
281
282
for (auto *SuccBB : make_range (succ_begin (BB), succ_end (BB))) {
@@ -312,6 +313,8 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
312
313
LLVM_DEBUG (dbgs () << " Marked as tail call candidate: " << *CI << " \n " );
313
314
CI->setTailCall ();
314
315
Modified = true ;
316
+ } else {
317
+ AllCallsAreTailCalls = false ;
315
318
}
316
319
}
317
320
@@ -322,16 +325,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
322
325
// / instruction from after the call to before the call, assuming that all
323
326
// / instructions between the call and this instruction are movable.
324
327
// /
325
- static bool canMoveAboveCall (Instruction *I, CallInst *CI, AliasAnalysis *AA,
326
- DenseMap<Value *, AllocaInst *> &AllocaForValue) {
327
- if (isa<DbgInfoIntrinsic>(I))
328
- return true ;
329
-
330
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
331
- if (II->getIntrinsicID () == Intrinsic::lifetime_end &&
332
- llvm::findAllocaForValue (II->getArgOperand (1 ), AllocaForValue))
333
- return true ;
334
-
328
+ static bool canMoveAboveCall (Instruction *I, CallInst *CI, AliasAnalysis *AA) {
335
329
// FIXME: We can move load/store/call/free instructions above the call if the
336
330
// call does not mod/ref the memory location being processed.
337
331
if (I->mayHaveSideEffects ()) // This also handles volatile loads.
@@ -398,6 +392,7 @@ class TailRecursionEliminator {
398
392
// createTailRecurseLoopHeader the first time we find a call we can eliminate.
399
393
BasicBlock *HeaderBB = nullptr ;
400
394
SmallVector<PHINode *, 8 > ArgumentPHIs;
395
+ bool RemovableCallsMustBeMarkedTail = false ;
401
396
402
397
// PHI node to store our return value.
403
398
PHINode *RetPN = nullptr ;
@@ -419,25 +414,25 @@ class TailRecursionEliminator {
419
414
// The instruction doing the accumulating.
420
415
Instruction *AccumulatorRecursionInstr = nullptr ;
421
416
422
- // The cache for <value, alloca instruction> pairs.
423
- DenseMap<Value *, AllocaInst *> AllocaForValue;
424
-
425
417
TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
426
418
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
427
419
DomTreeUpdater &DTU)
428
420
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
429
421
430
- CallInst *findTRECandidate (Instruction *TI);
422
+ CallInst *findTRECandidate (Instruction *TI,
423
+ bool CannotTailCallElimCallsMarkedTail);
431
424
432
425
void createTailRecurseLoopHeader (CallInst *CI);
433
426
434
427
void insertAccumulator (Instruction *AccRecInstr);
435
428
436
429
bool eliminateCall (CallInst *CI);
437
430
438
- bool foldReturnAndProcessPred (ReturnInst *Ret);
431
+ bool foldReturnAndProcessPred (ReturnInst *Ret,
432
+ bool CannotTailCallElimCallsMarkedTail);
439
433
440
- bool processReturningBlock (ReturnInst *Ret);
434
+ bool processReturningBlock (ReturnInst *Ret,
435
+ bool CannotTailCallElimCallsMarkedTail);
441
436
442
437
void cleanupAndFinalize ();
443
438
@@ -448,7 +443,8 @@ class TailRecursionEliminator {
448
443
};
449
444
} // namespace
450
445
451
- CallInst *TailRecursionEliminator::findTRECandidate (Instruction *TI) {
446
+ CallInst *TailRecursionEliminator::findTRECandidate (
447
+ Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
452
448
BasicBlock *BB = TI->getParent ();
453
449
454
450
if (&BB->front () == TI) // Make sure there is something before the terminator.
@@ -468,9 +464,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
468
464
--BBI;
469
465
}
470
466
471
- assert ((!CI-> isTailCall () || !CI-> isNoTailCall ()) &&
472
- " Incompatible call site attributes(Tail,NoTail) " );
473
- if (! CI->isTailCall ())
467
+ // If this call is marked as a tail call, and if there are dynamic allocas in
468
+ // the function, we cannot perform this optimization.
469
+ if (CI->isTailCall () && CannotTailCallElimCallsMarkedTail )
474
470
return nullptr ;
475
471
476
472
// As a special case, detect code like this:
@@ -502,13 +498,26 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
502
498
BranchInst *BI = BranchInst::Create (HeaderBB, NewEntry);
503
499
BI->setDebugLoc (CI->getDebugLoc ());
504
500
505
- // Move all fixed sized allocas from HeaderBB to NewEntry.
506
- for (BasicBlock::iterator OEBI = HeaderBB->begin (), E = HeaderBB->end (),
507
- NEBI = NewEntry->begin ();
508
- OEBI != E;)
509
- if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
510
- if (isa<ConstantInt>(AI->getArraySize ()))
511
- AI->moveBefore (&*NEBI);
501
+ // If this function has self recursive calls in the tail position where some
502
+ // are marked tail and some are not, only transform one flavor or another.
503
+ // We have to choose whether we move allocas in the entry block to the new
504
+ // entry block or not, so we can't make a good choice for both. We make this
505
+ // decision here based on whether the first call we found to remove is
506
+ // marked tail.
507
+ // NOTE: We could do slightly better here in the case that the function has
508
+ // no entry block allocas.
509
+ RemovableCallsMustBeMarkedTail = CI->isTailCall ();
510
+
511
+ // If this tail call is marked 'tail' and if there are any allocas in the
512
+ // entry block, move them up to the new entry block.
513
+ if (RemovableCallsMustBeMarkedTail)
514
+ // Move all fixed sized allocas from HeaderBB to NewEntry.
515
+ for (BasicBlock::iterator OEBI = HeaderBB->begin (), E = HeaderBB->end (),
516
+ NEBI = NewEntry->begin ();
517
+ OEBI != E;)
518
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
519
+ if (isa<ConstantInt>(AI->getArraySize ()))
520
+ AI->moveBefore (&*NEBI);
512
521
513
522
// Now that we have created a new block, which jumps to the entry
514
523
// block, insert a PHI node for each argument of the function.
@@ -583,7 +592,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
583
592
Instruction *AccRecInstr = nullptr ;
584
593
BasicBlock::iterator BBI (CI);
585
594
for (++BBI; &*BBI != Ret; ++BBI) {
586
- if (canMoveAboveCall (&*BBI, CI, AA, AllocaForValue ))
595
+ if (canMoveAboveCall (&*BBI, CI, AA))
587
596
continue ;
588
597
589
598
// If we can't move the instruction above the call, it might be because it
@@ -611,6 +620,9 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
611
620
if (!HeaderBB)
612
621
createTailRecurseLoopHeader (CI);
613
622
623
+ if (RemovableCallsMustBeMarkedTail && !CI->isTailCall ())
624
+ return false ;
625
+
614
626
// Ok, now that we know we have a pseudo-entry block WITH all of the
615
627
// required PHI nodes, add entries into the PHI node for the actual
616
628
// parameters passed into the tail-recursive call.
@@ -660,7 +672,8 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
660
672
return true ;
661
673
}
662
674
663
- bool TailRecursionEliminator::foldReturnAndProcessPred (ReturnInst *Ret) {
675
+ bool TailRecursionEliminator::foldReturnAndProcessPred (
676
+ ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
664
677
BasicBlock *BB = Ret->getParent ();
665
678
666
679
bool Change = false ;
@@ -685,7 +698,8 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
685
698
while (!UncondBranchPreds.empty ()) {
686
699
BranchInst *BI = UncondBranchPreds.pop_back_val ();
687
700
BasicBlock *Pred = BI->getParent ();
688
- if (CallInst *CI = findTRECandidate (BI)) {
701
+ if (CallInst *CI =
702
+ findTRECandidate (BI, CannotTailCallElimCallsMarkedTail)) {
689
703
LLVM_DEBUG (dbgs () << " FOLDING: " << *BB
690
704
<< " INTO UNCOND BRANCH PRED: " << *Pred);
691
705
FoldReturnIntoUncondBranch (Ret, BB, Pred, &DTU);
@@ -706,8 +720,9 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
706
720
return Change;
707
721
}
708
722
709
- bool TailRecursionEliminator::processReturningBlock (ReturnInst *Ret) {
710
- CallInst *CI = findTRECandidate (Ret);
723
+ bool TailRecursionEliminator::processReturningBlock (
724
+ ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
725
+ CallInst *CI = findTRECandidate (Ret, CannotTailCallElimCallsMarkedTail);
711
726
if (!CI)
712
727
return false ;
713
728
@@ -795,25 +810,35 @@ bool TailRecursionEliminator::eliminate(Function &F,
795
810
return false ;
796
811
797
812
bool MadeChange = false ;
798
- MadeChange |= markTails (F, ORE);
813
+ bool AllCallsAreTailCalls = false ;
814
+ MadeChange |= markTails (F, AllCallsAreTailCalls, ORE);
815
+ if (!AllCallsAreTailCalls)
816
+ return MadeChange;
799
817
800
818
// If this function is a varargs function, we won't be able to PHI the args
801
819
// right, so don't even try to convert it...
802
820
if (F.getFunctionType ()->isVarArg ())
803
821
return MadeChange;
804
822
805
- if (!canTRE (F))
806
- return MadeChange;
823
+ // If false, we cannot perform TRE on tail calls marked with the 'tail'
824
+ // attribute, because doing so would cause the stack size to increase (real
825
+ // TRE would deallocate variable sized allocas, TRE doesn't).
826
+ bool CanTRETailMarkedCall = canTRE (F);
807
827
808
828
TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
809
829
810
830
// Change any tail recursive calls to loops.
831
+ //
832
+ // FIXME: The code generator produces really bad code when an 'escaping
833
+ // alloca' is changed from being a static alloca to being a dynamic alloca.
834
+ // Until this is resolved, disable this transformation if that would ever
835
+ // happen. This bug is PR962.
811
836
for (Function::iterator BBI = F.begin (), E = F.end (); BBI != E; /* in loop*/ ) {
812
837
BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
813
838
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator ())) {
814
- bool Change = TRE.processReturningBlock (Ret);
839
+ bool Change = TRE.processReturningBlock (Ret, !CanTRETailMarkedCall );
815
840
if (!Change && BB->getFirstNonPHIOrDbg () == Ret)
816
- Change = TRE.foldReturnAndProcessPred (Ret);
841
+ Change = TRE.foldReturnAndProcessPred (Ret, !CanTRETailMarkedCall );
817
842
MadeChange |= Change;
818
843
}
819
844
}
0 commit comments