26
26
// ===----------------------------------------------------------------------===//
27
27
28
28
#include " llvm/ADT/DenseMap.h"
29
+ #include " llvm/ADT/STLExtras.h"
29
30
#include " llvm/ADT/SmallPtrSet.h"
30
31
#include " llvm/ADT/SmallVector.h"
31
32
#include " llvm/ADT/Statistic.h"
@@ -640,6 +641,26 @@ static bool equalityPropUnSafe(Value &LoopCond) {
640
641
return false ;
641
642
}
642
643
644
+ namespace {
645
+ // / Struct to hold information about a partially invariant condition.
646
+ struct IVConditionInfo {
647
+ // / Instructions that need to be duplicated and checked for the unswitching
648
+ // / condition.
649
+ SmallVector<Instruction *, 4 > InstToDuplicate;
650
+
651
+ // / Constant to indicate for which value the condition is invariant.
652
+ Constant *KnownValue = nullptr ;
653
+
654
+ // / True if the partially invariant path is no-op (=does not have any
655
+ // / side-effects and no loop value is used outside the loop).
656
+ bool PathIsNoop = true ;
657
+
658
+ // / If the partially invariant path reaches a single exit block, ExitForPath
659
+ // / is set to that block. Otherwise it is nullptr.
660
+ BasicBlock *ExitForPath = nullptr ;
661
+ };
662
+ } // namespace
663
+
643
664
// / Check if the loop header has a conditional branch that is not
644
665
// / loop-invariant, because it involves load instructions. If all paths from
645
666
// / either the true or false successor to the header or loop exists do not
@@ -651,9 +672,8 @@ static bool equalityPropUnSafe(Value &LoopCond) {
651
672
// / If the branch condition of the header is partially invariant, return a pair
652
673
// / containing the instructions to duplicate and a boolean Constant to update
653
674
// / the condition in the loops created for the true or false successors.
654
- static std::pair<SmallVector<Instruction *, 4 >, Constant *>
655
- hasPartialIVCondition (Loop *L, MemorySSA &MSSA, AAResults *AA) {
656
- SmallVector<Instruction *, 4 > ToDuplicate;
675
+ static Optional<IVConditionInfo> hasPartialIVCondition (Loop *L, MemorySSA &MSSA,
676
+ AAResults *AA) {
657
677
658
678
auto *TI = dyn_cast<BranchInst>(L->getHeader ()->getTerminator ());
659
679
if (!TI || !TI->isConditional ())
@@ -665,7 +685,8 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
665
685
if (!CondI || !L->contains (CondI))
666
686
return {};
667
687
668
- ToDuplicate.push_back (CondI);
688
+ SmallVector<Instruction *, 4 > InstToDuplicate;
689
+ InstToDuplicate.push_back (CondI);
669
690
670
691
SmallVector<Value *, 4 > WorkList;
671
692
WorkList.append (CondI->op_begin (), CondI->op_end ());
@@ -686,7 +707,7 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
686
707
if (LI->isVolatile () || LI->isAtomic ())
687
708
return {};
688
709
689
- ToDuplicate .push_back (I);
710
+ InstToDuplicate .push_back (I);
690
711
if (MemoryAccess *MA = MSSA.getMemoryAccess (I)) {
691
712
if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
692
713
// Queue the defining access to check for alias checks.
@@ -701,80 +722,126 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
701
722
WorkList.append (I->op_begin (), I->op_end ());
702
723
}
703
724
704
- if (ToDuplicate .size () <= 1 )
725
+ if (InstToDuplicate .size () <= 1 )
705
726
return {};
706
727
728
+ SmallVector<BasicBlock *, 4 > ExitingBlocks;
729
+ L->getExitingBlocks (ExitingBlocks);
707
730
auto HasNoClobbersOnPath =
708
- [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
709
- SmallVector<MemoryAccess *, 4 > AccessesToCheck) {
710
- // First, collect all blocks in the loop that are on a patch from Succ
711
- // to the header.
712
- SmallVector<BasicBlock *, 4 > WorkList;
713
- WorkList.push_back (Succ);
714
- WorkList.push_back (Header);
715
- SmallPtrSet<BasicBlock *, 4 > Seen;
716
- Seen.insert (Header);
717
- while (!WorkList.empty ()) {
718
- BasicBlock *Current = WorkList.pop_back_val ();
719
- if (!L->contains (Current))
720
- continue ;
721
- const auto &SeenIns = Seen.insert (Current);
722
- if (!SeenIns.second )
723
- continue ;
731
+ [L, AA, &AccessedLocs, &ExitingBlocks,
732
+ &InstToDuplicate](BasicBlock *Succ, BasicBlock *Header,
733
+ SmallVector<MemoryAccess *, 4 > AccessesToCheck)
734
+ -> Optional<IVConditionInfo> {
735
+ IVConditionInfo Info;
736
+ // First, collect all blocks in the loop that are on a patch from Succ
737
+ // to the header.
738
+ SmallVector<BasicBlock *, 4 > WorkList;
739
+ WorkList.push_back (Succ);
740
+ WorkList.push_back (Header);
741
+ SmallPtrSet<BasicBlock *, 4 > Seen;
742
+ Seen.insert (Header);
743
+ Info.PathIsNoop &=
744
+ all_of (*Header, [](Instruction &I) { return !I.mayHaveSideEffects (); });
745
+
746
+ while (!WorkList.empty ()) {
747
+ BasicBlock *Current = WorkList.pop_back_val ();
748
+ if (!L->contains (Current))
749
+ continue ;
750
+ const auto &SeenIns = Seen.insert (Current);
751
+ if (!SeenIns.second )
752
+ continue ;
724
753
725
- WorkList.append (succ_begin (Current), succ_end (Current));
726
- }
754
+ Info.PathIsNoop &= all_of (
755
+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects (); });
756
+ WorkList.append (succ_begin (Current), succ_end (Current));
757
+ }
727
758
728
- // Require at least 2 blocks on a path through the loop. This skips
729
- // paths that directly exit the loop.
730
- if (Seen.size () < 2 )
731
- return false ;
759
+ // Require at least 2 blocks on a path through the loop. This skips
760
+ // paths that directly exit the loop.
761
+ if (Seen.size () < 2 )
762
+ return {} ;
732
763
733
- // Next, check if there are any MemoryDefs that are on the path through
734
- // the loop (in the Seen set) and they may-alias any of the locations in
735
- // AccessedLocs. If that is the case, they may modify the condition and
736
- // partial unswitching is not possible.
737
- SmallPtrSet<MemoryAccess *, 4 > SeenAccesses;
738
- while (!AccessesToCheck.empty ()) {
739
- MemoryAccess *Current = AccessesToCheck.pop_back_val ();
740
- auto SeenI = SeenAccesses.insert (Current);
741
- if (!SeenI.second || !Seen.contains (Current->getBlock ()))
742
- continue ;
764
+ // Next, check if there are any MemoryDefs that are on the path through
765
+ // the loop (in the Seen set) and they may-alias any of the locations in
766
+ // AccessedLocs. If that is the case, they may modify the condition and
767
+ // partial unswitching is not possible.
768
+ SmallPtrSet<MemoryAccess *, 4 > SeenAccesses;
769
+ while (!AccessesToCheck.empty ()) {
770
+ MemoryAccess *Current = AccessesToCheck.pop_back_val ();
771
+ auto SeenI = SeenAccesses.insert (Current);
772
+ if (!SeenI.second || !Seen.contains (Current->getBlock ()))
773
+ continue ;
743
774
744
- // Bail out if exceeded the threshold.
745
- if (SeenAccesses.size () >= MSSAThreshold)
746
- return false ;
775
+ // Bail out if exceeded the threshold.
776
+ if (SeenAccesses.size () >= MSSAThreshold)
777
+ return {} ;
747
778
748
- // MemoryUse are read-only accesses.
749
- if (isa<MemoryUse>(Current))
750
- continue ;
779
+ // MemoryUse are read-only accesses.
780
+ if (isa<MemoryUse>(Current))
781
+ continue ;
751
782
752
- // For a MemoryDef, check if is aliases any of the location feeding
753
- // the original condition.
754
- if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
755
- if (any_of (AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
756
- return isModSet (
757
- AA->getModRefInfo (CurrentDef->getMemoryInst (), Loc));
758
- }))
759
- return false ;
760
- }
783
+ // For a MemoryDef, check if is aliases any of the location feeding
784
+ // the original condition.
785
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
786
+ if (any_of (AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
787
+ return isModSet (
788
+ AA->getModRefInfo (CurrentDef->getMemoryInst (), Loc));
789
+ }))
790
+ return {};
791
+ }
792
+
793
+ for (Use &U : Current->uses ())
794
+ AccessesToCheck.push_back (cast<MemoryAccess>(U.getUser ()));
795
+ }
761
796
762
- for (Use &U : Current->uses ())
763
- AccessesToCheck.push_back (cast<MemoryAccess>(U.getUser ()));
797
+ // We could also allow loops with known trip counts without mustprogress,
798
+ // but ScalarEvolution may not be available.
799
+ Info.PathIsNoop &=
800
+ L->getHeader ()->getParent ()->mustProgress () || hasMustProgress (L);
801
+
802
+ // If the path is considered a no-op so far, check if it reaches a
803
+ // single exit block without any phis. This ensures no values from the
804
+ // loop are used outside of the loop.
805
+ if (Info.PathIsNoop ) {
806
+ for (auto *Exiting : ExitingBlocks) {
807
+ if (!Seen.contains (Exiting))
808
+ continue ;
809
+ for (auto *Succ : successors (Exiting)) {
810
+ if (L->contains (Succ))
811
+ continue ;
812
+
813
+ Info.PathIsNoop &= llvm::empty (Succ->phis ()) &&
814
+ (!Info.ExitForPath || Info.ExitForPath == Succ);
815
+ if (!Info.PathIsNoop )
816
+ break ;
817
+ assert ((!Info.ExitForPath || Info.ExitForPath == Succ) &&
818
+ " cannot have multiple exit blocks" );
819
+ Info.ExitForPath = Succ;
764
820
}
821
+ }
822
+ }
823
+ if (!Info.ExitForPath )
824
+ Info.PathIsNoop = false ;
765
825
766
- return true ;
767
- };
826
+ Info.InstToDuplicate = InstToDuplicate;
827
+ return Info;
828
+ };
768
829
769
830
// If we branch to the same successor, partial unswitching will not be
770
831
// beneficial.
771
832
if (TI->getSuccessor (0 ) == TI->getSuccessor (1 ))
772
833
return {};
773
834
774
- if (HasNoClobbersOnPath (TI->getSuccessor (0 ), L->getHeader (), AccessesToCheck))
775
- return {ToDuplicate, ConstantInt::getTrue (TI->getContext ())};
776
- if (HasNoClobbersOnPath (TI->getSuccessor (1 ), L->getHeader (), AccessesToCheck))
777
- return {ToDuplicate, ConstantInt::getFalse (TI->getContext ())};
835
+ if (auto Info = HasNoClobbersOnPath (TI->getSuccessor (0 ), L->getHeader (),
836
+ AccessesToCheck)) {
837
+ Info->KnownValue = ConstantInt::getTrue (TI->getContext ());
838
+ return Info;
839
+ }
840
+ if (auto Info = HasNoClobbersOnPath (TI->getSuccessor (1 ), L->getHeader (),
841
+ AccessesToCheck)) {
842
+ Info->KnownValue = ConstantInt::getFalse (TI->getContext ());
843
+ return Info;
844
+ }
778
845
779
846
return {};
780
847
}
@@ -986,17 +1053,56 @@ bool LoopUnswitch::processCurrentLoop() {
986
1053
// metadata, to avoid unswitching the same loop multiple times.
987
1054
if (MSSA &&
988
1055
!findOptionMDForLoop (CurrentLoop, " llvm.loop.unswitch.partial.disable" )) {
989
- auto ToDuplicate = hasPartialIVCondition (CurrentLoop, *MSSA, AA);
990
- if (!ToDuplicate.first .empty ()) {
1056
+ if (auto Info = hasPartialIVCondition (CurrentLoop, *MSSA, AA)) {
1057
+ assert (!Info->InstToDuplicate .empty () &&
1058
+ " need at least a partially invariant condition" );
991
1059
LLVM_DEBUG (dbgs () << " loop-unswitch: Found partially invariant condition "
992
- << *ToDuplicate.first [0 ] << " \n " );
993
- ++NumBranches;
994
- unswitchIfProfitable (ToDuplicate.first [0 ], ToDuplicate.second ,
995
- CurrentLoop->getHeader ()->getTerminator (),
996
- ToDuplicate.first );
1060
+ << *Info->InstToDuplicate [0 ] << " \n " );
1061
+
1062
+ Instruction *TI = CurrentLoop->getHeader ()->getTerminator ();
1063
+ Value *LoopCond = Info->InstToDuplicate [0 ];
1064
+
1065
+ // If the partially unswitched path is a no-op and has a single exit
1066
+ // block, we do not need to do full unswitching. Instead, we can directly
1067
+ // branch to the exit.
1068
+ // TODO: Instead of duplicating the checks, we could also just directly
1069
+ // branch to the exit from the conditional branch in the loop.
1070
+ if (Info->PathIsNoop ) {
1071
+ if (HasBranchDivergence &&
1072
+ getAnalysis<LegacyDivergenceAnalysis>().isDivergent (LoopCond)) {
1073
+ LLVM_DEBUG (dbgs () << " NOT unswitching loop %"
1074
+ << CurrentLoop->getHeader ()->getName ()
1075
+ << " at non-trivial condition '"
1076
+ << *Info->KnownValue << " ' == " << *LoopCond << " \n "
1077
+ << " . Condition is divergent.\n " );
1078
+ return false ;
1079
+ }
997
1080
998
- RedoLoop = false ;
999
- return true ;
1081
+ ++NumBranches;
1082
+
1083
+ BasicBlock *TrueDest = LoopHeader;
1084
+ BasicBlock *FalseDest = Info->ExitForPath ;
1085
+ if (Info->KnownValue ->isOneValue ())
1086
+ std::swap (TrueDest, FalseDest);
1087
+
1088
+ auto *OldBr =
1089
+ cast<BranchInst>(CurrentLoop->getLoopPreheader ()->getTerminator ());
1090
+ emitPreheaderBranchOnCondition (LoopCond, Info->KnownValue , TrueDest,
1091
+ FalseDest, OldBr, TI,
1092
+ Info->InstToDuplicate );
1093
+ delete OldBr;
1094
+ RedoLoop = false ;
1095
+ return true ;
1096
+ }
1097
+
1098
+ // Otherwise, the path is not a no-op. Run regular unswitching.
1099
+ if (unswitchIfProfitable (LoopCond, Info->KnownValue ,
1100
+ CurrentLoop->getHeader ()->getTerminator (),
1101
+ Info->InstToDuplicate )) {
1102
+ ++NumBranches;
1103
+ RedoLoop = false ;
1104
+ return true ;
1105
+ }
1000
1106
}
1001
1107
}
1002
1108
@@ -1114,12 +1220,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
1114
1220
1115
1221
Loop *L = LI->getLoopFor (I->getParent ());
1116
1222
auto *DefiningAccess = MemA->getDefiningAccess ();
1117
- // If the defining access is a MemoryPhi in the header, get the incoming
1118
- // value for the pre-header as defining access.
1119
- if (DefiningAccess->getBlock () == I->getParent ()) {
1223
+ // Get the first defining access before the loop.
1224
+ while (L->contains (DefiningAccess->getBlock ())) {
1225
+ // If the defining access is a MemoryPhi, get the incoming
1226
+ // value for the pre-header as defining access.
1120
1227
if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
1121
1228
DefiningAccess =
1122
1229
MemPhi->getIncomingValueForBlock (L->getLoopPreheader ());
1230
+ } else {
1231
+ DefiningAccess =
1232
+ cast<MemoryDef>(DefiningAccess)->getDefiningAccess ();
1123
1233
}
1124
1234
}
1125
1235
MSSAU->createMemoryAccessInBB (New, DefiningAccess, New->getParent (),
0 commit comments