@@ -2662,19 +2662,20 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2662
2662
// / That requires knowing not just the number of "remaining" candidates but
2663
2663
// / also costs of unswitching for each of these candidates.
2664
2664
static int CalculateUnswitchCostMultiplier (
2665
- Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2666
- ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2665
+ const Instruction &TI, const Loop &L, const LoopInfo &LI,
2666
+ const DominatorTree &DT,
2667
+ ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *> > >
2667
2668
UnswitchCandidates) {
2668
2669
2669
2670
// Guards and other exiting conditions do not contribute to exponential
2670
2671
// explosion as soon as they dominate the latch (otherwise there might be
2671
2672
// another path to the latch remaining that does not allow to eliminate the
2672
2673
// loop copy on unswitch).
2673
- BasicBlock *Latch = L.getLoopLatch ();
2674
- BasicBlock *CondBlock = TI.getParent ();
2674
+ const BasicBlock *Latch = L.getLoopLatch ();
2675
+ const BasicBlock *CondBlock = TI.getParent ();
2675
2676
if (DT.dominates (CondBlock, Latch) &&
2676
2677
(isGuard (&TI) ||
2677
- llvm::count_if (successors (&TI), [&L](BasicBlock *SuccBB) {
2678
+ llvm::count_if (successors (&TI), [&L](const BasicBlock *SuccBB) {
2678
2679
return L.contains (SuccBB);
2679
2680
}) <= 1 )) {
2680
2681
NumCostMultiplierSkipped++;
@@ -2688,16 +2689,17 @@ static int CalculateUnswitchCostMultiplier(
2688
2689
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2689
2690
int UnswitchedClones = 0 ;
2690
2691
for (auto Candidate : UnswitchCandidates) {
2691
- Instruction *CI = Candidate.first ;
2692
- BasicBlock *CondBlock = CI->getParent ();
2692
+ const Instruction *CI = Candidate.first ;
2693
+ const BasicBlock *CondBlock = CI->getParent ();
2693
2694
bool SkipExitingSuccessors = DT.dominates (CondBlock, Latch);
2694
2695
if (isGuard (CI)) {
2695
2696
if (!SkipExitingSuccessors)
2696
2697
UnswitchedClones++;
2697
2698
continue ;
2698
2699
}
2699
- int NonExitingSuccessors = llvm::count_if (
2700
- successors (CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2700
+ int NonExitingSuccessors =
2701
+ llvm::count_if (successors (CondBlock),
2702
+ [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2701
2703
return !SkipExitingSuccessors || L.contains (SuccBB);
2702
2704
});
2703
2705
UnswitchedClones += Log2_32 (NonExitingSuccessors);
@@ -2817,54 +2819,20 @@ static bool collectUnswitchCandidates(
2817
2819
return !UnswitchCandidates.empty ();
2818
2820
}
2819
2821
2820
- static bool unswitchBestCondition (
2821
- Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2822
- AAResults &AA, TargetTransformInfo &TTI,
2823
- function_ref<void (bool , bool , ArrayRef<Loop *>)> UnswitchCB,
2824
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2825
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2826
- // Collect all invariant conditions within this loop (as opposed to an inner
2827
- // loop which would be handled when visiting that inner loop).
2828
- SmallVector<std::pair<Instruction *, TinyPtrVector<Value *> >, 4 >
2829
- UnswitchCandidates;
2830
- IVConditionInfo PartialIVInfo;
2831
- Instruction *PartialIVCondBranch = nullptr ;
2832
- // If we didn't find any candidates, we're done.
2833
- if (!collectUnswitchCandidates (UnswitchCandidates, PartialIVInfo,
2834
- PartialIVCondBranch, L, LI, AA, MSSAU))
2835
- return false ;
2836
-
2837
- // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2838
- // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2839
- // irreducible control flow into reducible control flow and introduce new
2840
- // loops "out of thin air". If we ever discover important use cases for doing
2841
- // this, we can add support to loop unswitch, but it is a lot of complexity
2842
- // for what seems little or no real world benefit.
2843
- LoopBlocksRPO RPOT (&L);
2844
- RPOT.perform (&LI);
2845
- if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2846
- return false ;
2847
-
2848
- SmallVector<BasicBlock *, 4 > ExitBlocks;
2849
- L.getUniqueExitBlocks (ExitBlocks);
2850
-
2851
- // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2852
- // instruction as we don't know how to split those exit blocks.
2853
- // FIXME: We should teach SplitBlock to handle this and remove this
2854
- // restriction.
2855
- for (auto *ExitBB : ExitBlocks) {
2856
- auto *I = ExitBB->getFirstNonPHI ();
2857
- if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2858
- LLVM_DEBUG (dbgs () << " Cannot unswitch because of cleanuppad/catchswitch "
2859
- " in exit block\n " );
2860
- return false ;
2861
- }
2862
- }
2863
-
2864
- LLVM_DEBUG (
2865
- dbgs () << " Considering " << UnswitchCandidates.size ()
2866
- << " non-trivial loop invariant conditions for unswitching.\n " );
2867
-
2822
+ namespace {
2823
+ struct NonTrivialUnswitchCandidate {
2824
+ Instruction *TI = nullptr ;
2825
+ InstructionCost Cost = 0 ;
2826
+ ArrayRef<Value *> Invariants;
2827
+ };
2828
+ } // end anonymous namespace.
2829
+
2830
+ static Optional<NonTrivialUnswitchCandidate>
2831
+ findBestNonTrivialUnswitchCandidate (
2832
+ ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *> > >
2833
+ UnswitchCandidates, const Loop &L, const DominatorTree &DT,
2834
+ const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI,
2835
+ const IVConditionInfo &PartialIVInfo) {
2868
2836
// Given that unswitching these terminators will require duplicating parts of
2869
2837
// the loop, so we need to be able to model that cost. Compute the ephemeral
2870
2838
// values and set up a data structure to hold per-BB costs. We cache each
@@ -2891,10 +2859,10 @@ static bool unswitchBestCondition(
2891
2859
continue ;
2892
2860
2893
2861
if (I.getType ()->isTokenTy () && I.isUsedOutsideOfBlock (BB))
2894
- return false ;
2862
+ return None ;
2895
2863
if (auto *CB = dyn_cast<CallBase>(&I))
2896
2864
if (CB->isConvergent () || CB->cannotDuplicate ())
2897
- return false ;
2865
+ return None ;
2898
2866
2899
2867
Cost += TTI.getInstructionCost (&I, CostKind);
2900
2868
}
@@ -2978,9 +2946,8 @@ static bool unswitchBestCondition(
2978
2946
" Cannot unswitch a condition without multiple distinct successors!" );
2979
2947
return (LoopCost - Cost) * (SuccessorsCount - 1 );
2980
2948
};
2981
- Instruction *BestUnswitchTI = nullptr ;
2982
- InstructionCost BestUnswitchCost = 0 ;
2983
- ArrayRef<Value *> BestUnswitchInvariants;
2949
+
2950
+ NonTrivialUnswitchCandidate Best;
2984
2951
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2985
2952
Instruction &TI = *TerminatorAndInvariants.first ;
2986
2953
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second ;
@@ -3006,34 +2973,90 @@ static bool unswitchBestCondition(
3006
2973
<< " for unswitch candidate: " << TI << " \n " );
3007
2974
}
3008
2975
3009
- if (!BestUnswitchTI || CandidateCost < BestUnswitchCost ) {
3010
- BestUnswitchTI = &TI;
3011
- BestUnswitchCost = CandidateCost;
3012
- BestUnswitchInvariants = Invariants;
2976
+ if (!Best. TI || CandidateCost < Best. Cost ) {
2977
+ Best. TI = &TI;
2978
+ Best. Cost = CandidateCost;
2979
+ Best. Invariants = Invariants;
3013
2980
}
3014
2981
}
3015
- assert (BestUnswitchTI && " Failed to find loop unswitch candidate" );
2982
+ return Best;
2983
+ }
3016
2984
3017
- if (BestUnswitchCost >= UnswitchThreshold) {
3018
- LLVM_DEBUG (dbgs () << " Cannot unswitch, lowest cost found: "
3019
- << BestUnswitchCost << " \n " );
2985
+ static bool unswitchBestCondition (
2986
+ Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2987
+ AAResults &AA, TargetTransformInfo &TTI,
2988
+ function_ref<void (bool , bool , ArrayRef<Loop *>)> UnswitchCB,
2989
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2990
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2991
+ // Collect all invariant conditions within this loop (as opposed to an inner
2992
+ // loop which would be handled when visiting that inner loop).
2993
+ SmallVector<std::pair<Instruction *, TinyPtrVector<Value *> >, 4 >
2994
+ UnswitchCandidates;
2995
+ IVConditionInfo PartialIVInfo;
2996
+ Instruction *PartialIVCondBranch = nullptr ;
2997
+ // If we didn't find any candidates, we're done.
2998
+ if (!collectUnswitchCandidates (UnswitchCandidates, PartialIVInfo,
2999
+ PartialIVCondBranch, L, LI, AA, MSSAU))
3000
+ return false ;
3001
+
3002
+ // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3003
+ // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3004
+ // irreducible control flow into reducible control flow and introduce new
3005
+ // loops "out of thin air". If we ever discover important use cases for doing
3006
+ // this, we can add support to loop unswitch, but it is a lot of complexity
3007
+ // for what seems little or no real world benefit.
3008
+ LoopBlocksRPO RPOT (&L);
3009
+ RPOT.perform (&LI);
3010
+ if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3011
+ return false ;
3012
+
3013
+ SmallVector<BasicBlock *, 4 > ExitBlocks;
3014
+ L.getUniqueExitBlocks (ExitBlocks);
3015
+
3016
+ // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3017
+ // instruction as we don't know how to split those exit blocks.
3018
+ // FIXME: We should teach SplitBlock to handle this and remove this
3019
+ // restriction.
3020
+ for (auto *ExitBB : ExitBlocks) {
3021
+ auto *I = ExitBB->getFirstNonPHI ();
3022
+ if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3023
+ LLVM_DEBUG (dbgs () << " Cannot unswitch because of cleanuppad/catchswitch "
3024
+ " in exit block\n " );
3025
+ return false ;
3026
+ }
3027
+ }
3028
+
3029
+ LLVM_DEBUG (
3030
+ dbgs () << " Considering " << UnswitchCandidates.size ()
3031
+ << " non-trivial loop invariant conditions for unswitching.\n " );
3032
+
3033
+ Optional<NonTrivialUnswitchCandidate> Best =
3034
+ findBestNonTrivialUnswitchCandidate (UnswitchCandidates, L, DT, LI, AC,
3035
+ TTI, PartialIVInfo);
3036
+ if (!Best)
3037
+ return false ;
3038
+
3039
+ assert (Best->TI && " Failed to find loop unswitch candidate" );
3040
+
3041
+ if (Best->Cost >= UnswitchThreshold) {
3042
+ LLVM_DEBUG (dbgs () << " Cannot unswitch, lowest cost found: " << Best->Cost
3043
+ << " \n " );
3020
3044
return false ;
3021
3045
}
3022
3046
3023
- if (BestUnswitchTI != PartialIVCondBranch)
3047
+ if (Best-> TI != PartialIVCondBranch)
3024
3048
PartialIVInfo.InstToDuplicate .clear ();
3025
3049
3026
3050
// If the best candidate is a guard, turn it into a branch.
3027
- if (isGuard (BestUnswitchTI))
3028
- BestUnswitchTI = turnGuardIntoBranch (cast<IntrinsicInst>(BestUnswitchTI), L,
3029
- ExitBlocks, DT, LI, MSSAU);
3030
-
3031
- LLVM_DEBUG (dbgs () << " Unswitching non-trivial (cost = "
3032
- << BestUnswitchCost << " ) terminator: " << *BestUnswitchTI
3033
- << " \n " );
3034
- unswitchNontrivialInvariants (L, *BestUnswitchTI, BestUnswitchInvariants,
3035
- ExitBlocks, PartialIVInfo, DT, LI, AC,
3036
- UnswitchCB, SE, MSSAU, DestroyLoopCB);
3051
+ if (isGuard (Best->TI ))
3052
+ Best->TI = turnGuardIntoBranch (cast<IntrinsicInst>(Best->TI ), L, ExitBlocks,
3053
+ DT, LI, MSSAU);
3054
+
3055
+ LLVM_DEBUG (dbgs () << " Unswitching non-trivial (cost = " << Best->Cost
3056
+ << " ) terminator: " << *Best->TI << " \n " );
3057
+ unswitchNontrivialInvariants (L, *Best->TI , Best->Invariants , ExitBlocks,
3058
+ PartialIVInfo, DT, LI, AC, UnswitchCB, SE, MSSAU,
3059
+ DestroyLoopCB);
3037
3060
return true ;
3038
3061
}
3039
3062
0 commit comments