@@ -640,6 +640,26 @@ static bool equalityPropUnSafe(Value &LoopCond) {
640
640
return false ;
641
641
}
642
642
643
+ namespace {
644
+ // / Struct to hold information about a partially invariant condition.
645
+ struct IVConditionInfo {
646
+ // / Instructions that need to be duplicated and checked for the unswitching
647
+ // / condition.
648
+ SmallVector<Instruction *, 4 > InstToDuplicate;
649
+
650
+ // / Constant to indicate for which value the condition is invariant.
651
+ Constant *KnownValue = nullptr ;
652
+
653
+ // / True if the partially invariant path is no-op (=does not have any
654
+ // / side-effects and no loop value is used outside the loop).
655
+ bool PathIsNoop = true ;
656
+
657
+ // / If the partially invariant path reaches a single exit block, ExitForPath
658
+ // / is set to that block. Otherwise it is nullptr.
659
+ BasicBlock *ExitForPath = nullptr ;
660
+ };
661
+ } // namespace
662
+
643
663
// / Check if the loop header has a conditional branch that is not
644
664
// / loop-invariant, because it involves load instructions. If all paths from
645
665
// / either the true or false successor to the header or loop exists do not
@@ -651,9 +671,8 @@ static bool equalityPropUnSafe(Value &LoopCond) {
651
671
// / If the branch condition of the header is partially invariant, return a pair
652
672
// / containing the instructions to duplicate and a boolean Constant to update
653
673
// / the condition in the loops created for the true or false successors.
654
- static std::pair<SmallVector<Instruction *, 4 >, Constant *>
655
- hasPartialIVCondition (Loop *L, MemorySSA &MSSA, AAResults *AA) {
656
- SmallVector<Instruction *, 4 > ToDuplicate;
674
+ static Optional<IVConditionInfo> hasPartialIVCondition (Loop *L, MemorySSA &MSSA,
675
+ AAResults *AA) {
657
676
658
677
auto *TI = dyn_cast<BranchInst>(L->getHeader ()->getTerminator ());
659
678
if (!TI || !TI->isConditional ())
@@ -665,7 +684,8 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
665
684
if (!CondI || !L->contains (CondI))
666
685
return {};
667
686
668
- ToDuplicate.push_back (CondI);
687
+ SmallVector<Instruction *, 4 > InstToDuplicate;
688
+ InstToDuplicate.push_back (CondI);
669
689
670
690
SmallVector<Value *, 4 > WorkList;
671
691
WorkList.append (CondI->op_begin (), CondI->op_end ());
@@ -686,7 +706,7 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
686
706
if (LI->isVolatile () || LI->isAtomic ())
687
707
return {};
688
708
689
- ToDuplicate .push_back (I);
709
+ InstToDuplicate .push_back (I);
690
710
if (MemoryAccess *MA = MSSA.getMemoryAccess (I)) {
691
711
if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
692
712
// Queue the defining access to check for alias checks.
@@ -701,80 +721,126 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
701
721
WorkList.append (I->op_begin (), I->op_end ());
702
722
}
703
723
704
- if (ToDuplicate .size () <= 1 )
724
+ if (InstToDuplicate .size () <= 1 )
705
725
return {};
706
726
727
+ SmallVector<BasicBlock *, 4 > ExitingBlocks;
728
+ L->getExitingBlocks (ExitingBlocks);
707
729
auto HasNoClobbersOnPath =
708
- [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
709
- SmallVector<MemoryAccess *, 4 > AccessesToCheck) {
710
- // First, collect all blocks in the loop that are on a patch from Succ
711
- // to the header.
712
- SmallVector<BasicBlock *, 4 > WorkList;
713
- WorkList.push_back (Succ);
714
- WorkList.push_back (Header);
715
- SmallPtrSet<BasicBlock *, 4 > Seen;
716
- Seen.insert (Header);
717
- while (!WorkList.empty ()) {
718
- BasicBlock *Current = WorkList.pop_back_val ();
719
- if (!L->contains (Current))
720
- continue ;
721
- const auto &SeenIns = Seen.insert (Current);
722
- if (!SeenIns.second )
723
- continue ;
730
+ [L, AA, &AccessedLocs, &ExitingBlocks,
731
+ &InstToDuplicate](BasicBlock *Succ, BasicBlock *Header,
732
+ SmallVector<MemoryAccess *, 4 > AccessesToCheck)
733
+ -> Optional<IVConditionInfo> {
734
+ IVConditionInfo Info;
735
+ // First, collect all blocks in the loop that are on a patch from Succ
736
+ // to the header.
737
+ SmallVector<BasicBlock *, 4 > WorkList;
738
+ WorkList.push_back (Succ);
739
+ WorkList.push_back (Header);
740
+ SmallPtrSet<BasicBlock *, 4 > Seen;
741
+ Seen.insert (Header);
742
+ Info.PathIsNoop &=
743
+ all_of (*Header, [](Instruction &I) { return !I.mayHaveSideEffects (); });
744
+
745
+ while (!WorkList.empty ()) {
746
+ BasicBlock *Current = WorkList.pop_back_val ();
747
+ if (!L->contains (Current))
748
+ continue ;
749
+ const auto &SeenIns = Seen.insert (Current);
750
+ if (!SeenIns.second )
751
+ continue ;
724
752
725
- WorkList.append (succ_begin (Current), succ_end (Current));
726
- }
753
+ Info.PathIsNoop &= all_of (
754
+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects (); });
755
+ WorkList.append (succ_begin (Current), succ_end (Current));
756
+ }
727
757
728
- // Require at least 2 blocks on a path through the loop. This skips
729
- // paths that directly exit the loop.
730
- if (Seen.size () < 2 )
731
- return false ;
758
+ // Require at least 2 blocks on a path through the loop. This skips
759
+ // paths that directly exit the loop.
760
+ if (Seen.size () < 2 )
761
+ return {} ;
732
762
733
- // Next, check if there are any MemoryDefs that are on the path through
734
- // the loop (in the Seen set) and they may-alias any of the locations in
735
- // AccessedLocs. If that is the case, they may modify the condition and
736
- // partial unswitching is not possible.
737
- SmallPtrSet<MemoryAccess *, 4 > SeenAccesses;
738
- while (!AccessesToCheck.empty ()) {
739
- MemoryAccess *Current = AccessesToCheck.pop_back_val ();
740
- auto SeenI = SeenAccesses.insert (Current);
741
- if (!SeenI.second || !Seen.contains (Current->getBlock ()))
742
- continue ;
763
+ // Next, check if there are any MemoryDefs that are on the path through
764
+ // the loop (in the Seen set) and they may-alias any of the locations in
765
+ // AccessedLocs. If that is the case, they may modify the condition and
766
+ // partial unswitching is not possible.
767
+ SmallPtrSet<MemoryAccess *, 4 > SeenAccesses;
768
+ while (!AccessesToCheck.empty ()) {
769
+ MemoryAccess *Current = AccessesToCheck.pop_back_val ();
770
+ auto SeenI = SeenAccesses.insert (Current);
771
+ if (!SeenI.second || !Seen.contains (Current->getBlock ()))
772
+ continue ;
743
773
744
- // Bail out if exceeded the threshold.
745
- if (SeenAccesses.size () >= MSSAThreshold)
746
- return false ;
774
+ // Bail out if exceeded the threshold.
775
+ if (SeenAccesses.size () >= MSSAThreshold)
776
+ return {} ;
747
777
748
- // MemoryUse are read-only accesses.
749
- if (isa<MemoryUse>(Current))
750
- continue ;
778
+ // MemoryUse are read-only accesses.
779
+ if (isa<MemoryUse>(Current))
780
+ continue ;
751
781
752
- // For a MemoryDef, check if is aliases any of the location feeding
753
- // the original condition.
754
- if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
755
- if (any_of (AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
756
- return isModSet (
757
- AA->getModRefInfo (CurrentDef->getMemoryInst (), Loc));
758
- }))
759
- return false ;
760
- }
782
+ // For a MemoryDef, check if is aliases any of the location feeding
783
+ // the original condition.
784
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
785
+ if (any_of (AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
786
+ return isModSet (
787
+ AA->getModRefInfo (CurrentDef->getMemoryInst (), Loc));
788
+ }))
789
+ return {};
790
+ }
791
+
792
+ for (Use &U : Current->uses ())
793
+ AccessesToCheck.push_back (cast<MemoryAccess>(U.getUser ()));
794
+ }
761
795
762
- for (Use &U : Current->uses ())
763
- AccessesToCheck.push_back (cast<MemoryAccess>(U.getUser ()));
796
+ // We could also allow loops with known trip counts without mustprogress,
797
+ // but ScalarEvolution may not be available.
798
+ Info.PathIsNoop &=
799
+ L->getHeader ()->getParent ()->mustProgress () || hasMustProgress (L);
800
+
801
+ // If the path is considered a no-op so far, check if it reaches a
802
+ // single exit block without any phis. This ensures no values from the
803
+ // loop are used outside of the loop.
804
+ if (Info.PathIsNoop ) {
805
+ for (auto *Exiting : ExitingBlocks) {
806
+ if (!Seen.contains (Exiting))
807
+ continue ;
808
+ for (auto *Succ : successors (Exiting)) {
809
+ if (L->contains (Succ))
810
+ continue ;
811
+
812
+ Info.PathIsNoop &= empty (Succ->phis ()) &&
813
+ (!Info.ExitForPath || Info.ExitForPath == Succ);
814
+ if (!Info.PathIsNoop )
815
+ break ;
816
+ assert (!Info.ExitForPath || Info.ExitForPath == Succ &&
817
+ " cannot have multiple exit blocks" );
818
+ Info.ExitForPath = Succ;
764
819
}
820
+ }
821
+ }
822
+ if (!Info.ExitForPath )
823
+ Info.PathIsNoop = false ;
765
824
766
- return true ;
767
- };
825
+ Info.InstToDuplicate = InstToDuplicate;
826
+ return Info;
827
+ };
768
828
769
829
// If we branch to the same successor, partial unswitching will not be
770
830
// beneficial.
771
831
if (TI->getSuccessor (0 ) == TI->getSuccessor (1 ))
772
832
return {};
773
833
774
- if (HasNoClobbersOnPath (TI->getSuccessor (0 ), L->getHeader (), AccessesToCheck))
775
- return {ToDuplicate, ConstantInt::getTrue (TI->getContext ())};
776
- if (HasNoClobbersOnPath (TI->getSuccessor (1 ), L->getHeader (), AccessesToCheck))
777
- return {ToDuplicate, ConstantInt::getFalse (TI->getContext ())};
834
+ if (auto Info = HasNoClobbersOnPath (TI->getSuccessor (0 ), L->getHeader (),
835
+ AccessesToCheck)) {
836
+ Info->KnownValue = ConstantInt::getTrue (TI->getContext ());
837
+ return Info;
838
+ }
839
+ if (auto Info = HasNoClobbersOnPath (TI->getSuccessor (1 ), L->getHeader (),
840
+ AccessesToCheck)) {
841
+ Info->KnownValue = ConstantInt::getFalse (TI->getContext ());
842
+ return Info;
843
+ }
778
844
779
845
return {};
780
846
}
@@ -986,17 +1052,56 @@ bool LoopUnswitch::processCurrentLoop() {
986
1052
// metadata, to avoid unswitching the same loop multiple times.
987
1053
if (MSSA &&
988
1054
!findOptionMDForLoop (CurrentLoop, " llvm.loop.unswitch.partial.disable" )) {
989
- auto ToDuplicate = hasPartialIVCondition (CurrentLoop, *MSSA, AA);
990
- if (!ToDuplicate.first .empty ()) {
1055
+ if (auto Info = hasPartialIVCondition (CurrentLoop, *MSSA, AA)) {
1056
+ assert (!Info->InstToDuplicate .empty () &&
1057
+ " need at least a partially invariant condition" );
991
1058
LLVM_DEBUG (dbgs () << " loop-unswitch: Found partially invariant condition "
992
- << *ToDuplicate.first [0 ] << " \n " );
993
- ++NumBranches;
994
- unswitchIfProfitable (ToDuplicate.first [0 ], ToDuplicate.second ,
995
- CurrentLoop->getHeader ()->getTerminator (),
996
- ToDuplicate.first );
1059
+ << *Info->InstToDuplicate [0 ] << " \n " );
1060
+
1061
+ Instruction *TI = CurrentLoop->getHeader ()->getTerminator ();
1062
+ Value *LoopCond = Info->InstToDuplicate [0 ];
1063
+
1064
+ // If the partially unswitched path is a no-op and has a single exit
1065
+ // block, we do not need to do full unswitching. Instead, we can directly
1066
+ // branch to the exit.
1067
+ // TODO: Instead of duplicating the checks, we could also just directly
1068
+ // branch to the exit from the conditional branch in the loop.
1069
+ if (Info->PathIsNoop ) {
1070
+ if (HasBranchDivergence &&
1071
+ getAnalysis<LegacyDivergenceAnalysis>().isDivergent (LoopCond)) {
1072
+ LLVM_DEBUG (dbgs () << " NOT unswitching loop %"
1073
+ << CurrentLoop->getHeader ()->getName ()
1074
+ << " at non-trivial condition '"
1075
+ << *Info->KnownValue << " ' == " << *LoopCond << " \n "
1076
+ << " . Condition is divergent.\n " );
1077
+ return false ;
1078
+ }
997
1079
998
- RedoLoop = false ;
999
- return true ;
1080
+ ++NumBranches;
1081
+
1082
+ BasicBlock *TrueDest = LoopHeader;
1083
+ BasicBlock *FalseDest = Info->ExitForPath ;
1084
+ if (Info->KnownValue ->isOneValue ())
1085
+ std::swap (TrueDest, FalseDest);
1086
+
1087
+ auto *OldBr =
1088
+ cast<BranchInst>(CurrentLoop->getLoopPreheader ()->getTerminator ());
1089
+ emitPreheaderBranchOnCondition (LoopCond, Info->KnownValue , TrueDest,
1090
+ FalseDest, OldBr, TI,
1091
+ Info->InstToDuplicate );
1092
+ delete OldBr;
1093
+ RedoLoop = false ;
1094
+ return true ;
1095
+ }
1096
+
1097
+ // Otherwise, the path is not a no-op. Run regular unswitching.
1098
+ if (unswitchIfProfitable (LoopCond, Info->KnownValue ,
1099
+ CurrentLoop->getHeader ()->getTerminator (),
1100
+ Info->InstToDuplicate )) {
1101
+ ++NumBranches;
1102
+ RedoLoop = false ;
1103
+ return true ;
1104
+ }
1000
1105
}
1001
1106
}
1002
1107
0 commit comments