Skip to content

Commit 0a01fc9

Browse files
committed
Revert "[TRE] allow TRE for non-capturing calls."
This reverts commit f7907e9. That commit caused error on multi-stage build.
1 parent 572c290 commit 0a01fc9

File tree

4 files changed

+72
-247
lines changed

4 files changed

+72
-247
lines changed

llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp

Lines changed: 69 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@
8181
#include "llvm/Support/raw_ostream.h"
8282
#include "llvm/Transforms/Scalar.h"
8383
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
84-
#include "llvm/Transforms/Utils/Local.h"
8584
using namespace llvm;
8685

8786
#define DEBUG_TYPE "tailcallelim"
@@ -93,10 +92,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
9392
/// Scan the specified function for alloca instructions.
9493
/// If it contains any dynamic allocas, returns false.
9594
static bool canTRE(Function &F) {
96-
// TODO: We don't do TRE if dynamic allocas are used.
97-
// Dynamic allocas allocate stack space which should be
98-
// deallocated before new iteration started. That is
99-
// currently not implemented.
95+
// Because of PR962, we don't TRE dynamic allocas.
10096
return llvm::all_of(instructions(F), [](Instruction &I) {
10197
auto *AI = dyn_cast<AllocaInst>(&I);
10298
return !AI || AI->isStaticAlloca();
@@ -189,9 +185,11 @@ struct AllocaDerivedValueTracker {
189185
};
190186
}
191187

192-
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
188+
static bool markTails(Function &F, bool &AllCallsAreTailCalls,
189+
OptimizationRemarkEmitter *ORE) {
193190
if (F.callsFunctionThatReturnsTwice())
194191
return false;
192+
AllCallsAreTailCalls = true;
195193

196194
// The local stack holds all alloca instructions and all byval arguments.
197195
AllocaDerivedValueTracker Tracker;
@@ -274,8 +272,11 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
274272
}
275273
}
276274

277-
if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
275+
if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
278276
DeferredTails.push_back(CI);
277+
} else {
278+
AllCallsAreTailCalls = false;
279+
}
279280
}
280281

281282
for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
@@ -312,6 +313,8 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
312313
LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
313314
CI->setTailCall();
314315
Modified = true;
316+
} else {
317+
AllCallsAreTailCalls = false;
315318
}
316319
}
317320

@@ -322,16 +325,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
322325
/// instruction from after the call to before the call, assuming that all
323326
/// instructions between the call and this instruction are movable.
324327
///
325-
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA,
326-
DenseMap<Value *, AllocaInst *> &AllocaForValue) {
327-
if (isa<DbgInfoIntrinsic>(I))
328-
return true;
329-
330-
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
331-
if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
332-
llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue))
333-
return true;
334-
328+
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
335329
// FIXME: We can move load/store/call/free instructions above the call if the
336330
// call does not mod/ref the memory location being processed.
337331
if (I->mayHaveSideEffects()) // This also handles volatile loads.
@@ -398,6 +392,7 @@ class TailRecursionEliminator {
398392
// createTailRecurseLoopHeader the first time we find a call we can eliminate.
399393
BasicBlock *HeaderBB = nullptr;
400394
SmallVector<PHINode *, 8> ArgumentPHIs;
395+
bool RemovableCallsMustBeMarkedTail = false;
401396

402397
// PHI node to store our return value.
403398
PHINode *RetPN = nullptr;
@@ -419,25 +414,25 @@ class TailRecursionEliminator {
419414
// The instruction doing the accumulating.
420415
Instruction *AccumulatorRecursionInstr = nullptr;
421416

422-
// The cache for <value, alloca instruction> pairs.
423-
DenseMap<Value *, AllocaInst *> AllocaForValue;
424-
425417
TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
426418
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
427419
DomTreeUpdater &DTU)
428420
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
429421

430-
CallInst *findTRECandidate(Instruction *TI);
422+
CallInst *findTRECandidate(Instruction *TI,
423+
bool CannotTailCallElimCallsMarkedTail);
431424

432425
void createTailRecurseLoopHeader(CallInst *CI);
433426

434427
void insertAccumulator(Instruction *AccRecInstr);
435428

436429
bool eliminateCall(CallInst *CI);
437430

438-
bool foldReturnAndProcessPred(ReturnInst *Ret);
431+
bool foldReturnAndProcessPred(ReturnInst *Ret,
432+
bool CannotTailCallElimCallsMarkedTail);
439433

440-
bool processReturningBlock(ReturnInst *Ret);
434+
bool processReturningBlock(ReturnInst *Ret,
435+
bool CannotTailCallElimCallsMarkedTail);
441436

442437
void cleanupAndFinalize();
443438

@@ -448,7 +443,8 @@ class TailRecursionEliminator {
448443
};
449444
} // namespace
450445

451-
CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
446+
CallInst *TailRecursionEliminator::findTRECandidate(
447+
Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
452448
BasicBlock *BB = TI->getParent();
453449

454450
if (&BB->front() == TI) // Make sure there is something before the terminator.
@@ -468,9 +464,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
468464
--BBI;
469465
}
470466

471-
assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
472-
"Incompatible call site attributes(Tail,NoTail)");
473-
if (!CI->isTailCall())
467+
// If this call is marked as a tail call, and if there are dynamic allocas in
468+
// the function, we cannot perform this optimization.
469+
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
474470
return nullptr;
475471

476472
// As a special case, detect code like this:
@@ -502,13 +498,26 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
502498
BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
503499
BI->setDebugLoc(CI->getDebugLoc());
504500

505-
// Move all fixed sized allocas from HeaderBB to NewEntry.
506-
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
507-
NEBI = NewEntry->begin();
508-
OEBI != E;)
509-
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
510-
if (isa<ConstantInt>(AI->getArraySize()))
511-
AI->moveBefore(&*NEBI);
501+
// If this function has self recursive calls in the tail position where some
502+
// are marked tail and some are not, only transform one flavor or another.
503+
// We have to choose whether we move allocas in the entry block to the new
504+
// entry block or not, so we can't make a good choice for both. We make this
505+
// decision here based on whether the first call we found to remove is
506+
// marked tail.
507+
// NOTE: We could do slightly better here in the case that the function has
508+
// no entry block allocas.
509+
RemovableCallsMustBeMarkedTail = CI->isTailCall();
510+
511+
// If this tail call is marked 'tail' and if there are any allocas in the
512+
// entry block, move them up to the new entry block.
513+
if (RemovableCallsMustBeMarkedTail)
514+
// Move all fixed sized allocas from HeaderBB to NewEntry.
515+
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
516+
NEBI = NewEntry->begin();
517+
OEBI != E;)
518+
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
519+
if (isa<ConstantInt>(AI->getArraySize()))
520+
AI->moveBefore(&*NEBI);
512521

513522
// Now that we have created a new block, which jumps to the entry
514523
// block, insert a PHI node for each argument of the function.
@@ -583,7 +592,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
583592
Instruction *AccRecInstr = nullptr;
584593
BasicBlock::iterator BBI(CI);
585594
for (++BBI; &*BBI != Ret; ++BBI) {
586-
if (canMoveAboveCall(&*BBI, CI, AA, AllocaForValue))
595+
if (canMoveAboveCall(&*BBI, CI, AA))
587596
continue;
588597

589598
// If we can't move the instruction above the call, it might be because it
@@ -611,6 +620,9 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
611620
if (!HeaderBB)
612621
createTailRecurseLoopHeader(CI);
613622

623+
if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
624+
return false;
625+
614626
// Ok, now that we know we have a pseudo-entry block WITH all of the
615627
// required PHI nodes, add entries into the PHI node for the actual
616628
// parameters passed into the tail-recursive call.
@@ -660,7 +672,8 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
660672
return true;
661673
}
662674

663-
bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
675+
bool TailRecursionEliminator::foldReturnAndProcessPred(
676+
ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
664677
BasicBlock *BB = Ret->getParent();
665678

666679
bool Change = false;
@@ -685,7 +698,8 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
685698
while (!UncondBranchPreds.empty()) {
686699
BranchInst *BI = UncondBranchPreds.pop_back_val();
687700
BasicBlock *Pred = BI->getParent();
688-
if (CallInst *CI = findTRECandidate(BI)) {
701+
if (CallInst *CI =
702+
findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
689703
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
690704
<< "INTO UNCOND BRANCH PRED: " << *Pred);
691705
FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
@@ -706,8 +720,9 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
706720
return Change;
707721
}
708722

709-
bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) {
710-
CallInst *CI = findTRECandidate(Ret);
723+
bool TailRecursionEliminator::processReturningBlock(
724+
ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
725+
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
711726
if (!CI)
712727
return false;
713728

@@ -795,25 +810,35 @@ bool TailRecursionEliminator::eliminate(Function &F,
795810
return false;
796811

797812
bool MadeChange = false;
798-
MadeChange |= markTails(F, ORE);
813+
bool AllCallsAreTailCalls = false;
814+
MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
815+
if (!AllCallsAreTailCalls)
816+
return MadeChange;
799817

800818
// If this function is a varargs function, we won't be able to PHI the args
801819
// right, so don't even try to convert it...
802820
if (F.getFunctionType()->isVarArg())
803821
return MadeChange;
804822

805-
if (!canTRE(F))
806-
return MadeChange;
823+
// If false, we cannot perform TRE on tail calls marked with the 'tail'
824+
// attribute, because doing so would cause the stack size to increase (real
825+
// TRE would deallocate variable sized allocas, TRE doesn't).
826+
bool CanTRETailMarkedCall = canTRE(F);
807827

808828
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
809829

810830
// Change any tail recursive calls to loops.
831+
//
832+
// FIXME: The code generator produces really bad code when an 'escaping
833+
// alloca' is changed from being a static alloca to being a dynamic alloca.
834+
// Until this is resolved, disable this transformation if that would ever
835+
// happen. This bug is PR962.
811836
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
812837
BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
813838
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
814-
bool Change = TRE.processReturningBlock(Ret);
839+
bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
815840
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
816-
Change = TRE.foldReturnAndProcessPred(Ret);
841+
Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
817842
MadeChange |= Change;
818843
}
819844
}

llvm/test/Transforms/TailCallElim/basic.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,15 @@ define void @test0() {
1212
ret void
1313
}
1414

15-
; Make sure that we do not do TRE if pointer to local stack
16-
; escapes through function call.
15+
; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
1716
define i32 @test1() {
1817
; CHECK: i32 @test1()
1918
; CHECK-NEXT: alloca
2019
%A = alloca i32 ; <i32*> [#uses=2]
2120
store i32 5, i32* %A
2221
call void @use(i32* %A)
23-
; CHECK: call i32 @test1
24-
%X = call i32 @test1() ; <i32> [#uses=1]
22+
; CHECK: tail call i32 @test1
23+
%X = tail call i32 @test1() ; <i32> [#uses=1]
2524
ret i32 %X
2625
}
2726

0 commit comments

Comments
 (0)