Skip to content

Commit b8c81fa

Browse files
committed
[LoopUnswitch] Add shortcut if unswitched path is a no-op.
If we determine that the invariant path through the loop has no effects, we can directly branch to the exit block, instead to unswitching first. Besides avoiding some extra work (unswitching first, then deleting the loop again) this allows to be more aggressive than regular unswitching with respect to cost-modeling. This approach should always be be desirable. This is similar in spirit to D93734, just that it uses the previously added checks for loop-unswitching. I tried to add the required no-op checks from scratch, as we only check a subset of the loop. There is potential to unify the checks with LoopDeletion, at the cost of adding a predicate whether a block should be considered. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95468
1 parent 80cdd30 commit b8c81fa

File tree

2 files changed

+199
-70
lines changed

2 files changed

+199
-70
lines changed

llvm/lib/Transforms/Scalar/LoopUnswitch.cpp

Lines changed: 175 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,26 @@ static bool equalityPropUnSafe(Value &LoopCond) {
640640
return false;
641641
}
642642

643+
namespace {
644+
/// Struct to hold information about a partially invariant condition.
645+
struct IVConditionInfo {
646+
/// Instructions that need to be duplicated and checked for the unswitching
647+
/// condition.
648+
SmallVector<Instruction *, 4> InstToDuplicate;
649+
650+
/// Constant to indicate for which value the condition is invariant.
651+
Constant *KnownValue = nullptr;
652+
653+
/// True if the partially invariant path is no-op (=does not have any
654+
/// side-effects and no loop value is used outside the loop).
655+
bool PathIsNoop = true;
656+
657+
/// If the partially invariant path reaches a single exit block, ExitForPath
658+
/// is set to that block. Otherwise it is nullptr.
659+
BasicBlock *ExitForPath = nullptr;
660+
};
661+
} // namespace
662+
643663
/// Check if the loop header has a conditional branch that is not
644664
/// loop-invariant, because it involves load instructions. If all paths from
645665
/// either the true or false successor to the header or loop exists do not
@@ -651,9 +671,8 @@ static bool equalityPropUnSafe(Value &LoopCond) {
651671
/// If the branch condition of the header is partially invariant, return a pair
652672
/// containing the instructions to duplicate and a boolean Constant to update
653673
/// the condition in the loops created for the true or false successors.
654-
static std::pair<SmallVector<Instruction *, 4>, Constant *>
655-
hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
656-
SmallVector<Instruction *, 4> ToDuplicate;
674+
static Optional<IVConditionInfo> hasPartialIVCondition(Loop *L, MemorySSA &MSSA,
675+
AAResults *AA) {
657676

658677
auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
659678
if (!TI || !TI->isConditional())
@@ -665,7 +684,8 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
665684
if (!CondI || !L->contains(CondI))
666685
return {};
667686

668-
ToDuplicate.push_back(CondI);
687+
SmallVector<Instruction *, 4> InstToDuplicate;
688+
InstToDuplicate.push_back(CondI);
669689

670690
SmallVector<Value *, 4> WorkList;
671691
WorkList.append(CondI->op_begin(), CondI->op_end());
@@ -686,7 +706,7 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
686706
if (LI->isVolatile() || LI->isAtomic())
687707
return {};
688708

689-
ToDuplicate.push_back(I);
709+
InstToDuplicate.push_back(I);
690710
if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
691711
if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
692712
// Queue the defining access to check for alias checks.
@@ -701,80 +721,126 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
701721
WorkList.append(I->op_begin(), I->op_end());
702722
}
703723

704-
if (ToDuplicate.size() <= 1)
724+
if (InstToDuplicate.size() <= 1)
705725
return {};
706726

727+
SmallVector<BasicBlock *, 4> ExitingBlocks;
728+
L->getExitingBlocks(ExitingBlocks);
707729
auto HasNoClobbersOnPath =
708-
[L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
709-
SmallVector<MemoryAccess *, 4> AccessesToCheck) {
710-
// First, collect all blocks in the loop that are on a patch from Succ
711-
// to the header.
712-
SmallVector<BasicBlock *, 4> WorkList;
713-
WorkList.push_back(Succ);
714-
WorkList.push_back(Header);
715-
SmallPtrSet<BasicBlock *, 4> Seen;
716-
Seen.insert(Header);
717-
while (!WorkList.empty()) {
718-
BasicBlock *Current = WorkList.pop_back_val();
719-
if (!L->contains(Current))
720-
continue;
721-
const auto &SeenIns = Seen.insert(Current);
722-
if (!SeenIns.second)
723-
continue;
730+
[L, AA, &AccessedLocs, &ExitingBlocks,
731+
&InstToDuplicate](BasicBlock *Succ, BasicBlock *Header,
732+
SmallVector<MemoryAccess *, 4> AccessesToCheck)
733+
-> Optional<IVConditionInfo> {
734+
IVConditionInfo Info;
735+
// First, collect all blocks in the loop that are on a patch from Succ
736+
// to the header.
737+
SmallVector<BasicBlock *, 4> WorkList;
738+
WorkList.push_back(Succ);
739+
WorkList.push_back(Header);
740+
SmallPtrSet<BasicBlock *, 4> Seen;
741+
Seen.insert(Header);
742+
Info.PathIsNoop &=
743+
all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
744+
745+
while (!WorkList.empty()) {
746+
BasicBlock *Current = WorkList.pop_back_val();
747+
if (!L->contains(Current))
748+
continue;
749+
const auto &SeenIns = Seen.insert(Current);
750+
if (!SeenIns.second)
751+
continue;
724752

725-
WorkList.append(succ_begin(Current), succ_end(Current));
726-
}
753+
Info.PathIsNoop &= all_of(
754+
*Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
755+
WorkList.append(succ_begin(Current), succ_end(Current));
756+
}
727757

728-
// Require at least 2 blocks on a path through the loop. This skips
729-
// paths that directly exit the loop.
730-
if (Seen.size() < 2)
731-
return false;
758+
// Require at least 2 blocks on a path through the loop. This skips
759+
// paths that directly exit the loop.
760+
if (Seen.size() < 2)
761+
return {};
732762

733-
// Next, check if there are any MemoryDefs that are on the path through
734-
// the loop (in the Seen set) and they may-alias any of the locations in
735-
// AccessedLocs. If that is the case, they may modify the condition and
736-
// partial unswitching is not possible.
737-
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
738-
while (!AccessesToCheck.empty()) {
739-
MemoryAccess *Current = AccessesToCheck.pop_back_val();
740-
auto SeenI = SeenAccesses.insert(Current);
741-
if (!SeenI.second || !Seen.contains(Current->getBlock()))
742-
continue;
763+
// Next, check if there are any MemoryDefs that are on the path through
764+
// the loop (in the Seen set) and they may-alias any of the locations in
765+
// AccessedLocs. If that is the case, they may modify the condition and
766+
// partial unswitching is not possible.
767+
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
768+
while (!AccessesToCheck.empty()) {
769+
MemoryAccess *Current = AccessesToCheck.pop_back_val();
770+
auto SeenI = SeenAccesses.insert(Current);
771+
if (!SeenI.second || !Seen.contains(Current->getBlock()))
772+
continue;
743773

744-
// Bail out if exceeded the threshold.
745-
if (SeenAccesses.size() >= MSSAThreshold)
746-
return false;
774+
// Bail out if exceeded the threshold.
775+
if (SeenAccesses.size() >= MSSAThreshold)
776+
return {};
747777

748-
// MemoryUse are read-only accesses.
749-
if (isa<MemoryUse>(Current))
750-
continue;
778+
// MemoryUse are read-only accesses.
779+
if (isa<MemoryUse>(Current))
780+
continue;
751781

752-
// For a MemoryDef, check if is aliases any of the location feeding
753-
// the original condition.
754-
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
755-
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
756-
return isModSet(
757-
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
758-
}))
759-
return false;
760-
}
782+
// For a MemoryDef, check if is aliases any of the location feeding
783+
// the original condition.
784+
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
785+
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
786+
return isModSet(
787+
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
788+
}))
789+
return {};
790+
}
791+
792+
for (Use &U : Current->uses())
793+
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
794+
}
761795

762-
for (Use &U : Current->uses())
763-
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
796+
// We could also allow loops with known trip counts without mustprogress,
797+
// but ScalarEvolution may not be available.
798+
Info.PathIsNoop &=
799+
L->getHeader()->getParent()->mustProgress() || hasMustProgress(L);
800+
801+
// If the path is considered a no-op so far, check if it reaches a
802+
// single exit block without any phis. This ensures no values from the
803+
// loop are used outside of the loop.
804+
if (Info.PathIsNoop) {
805+
for (auto *Exiting : ExitingBlocks) {
806+
if (!Seen.contains(Exiting))
807+
continue;
808+
for (auto *Succ : successors(Exiting)) {
809+
if (L->contains(Succ))
810+
continue;
811+
812+
Info.PathIsNoop &= empty(Succ->phis()) &&
813+
(!Info.ExitForPath || Info.ExitForPath == Succ);
814+
if (!Info.PathIsNoop)
815+
break;
816+
assert(!Info.ExitForPath || Info.ExitForPath == Succ &&
817+
"cannot have multiple exit blocks");
818+
Info.ExitForPath = Succ;
764819
}
820+
}
821+
}
822+
if (!Info.ExitForPath)
823+
Info.PathIsNoop = false;
765824

766-
return true;
767-
};
825+
Info.InstToDuplicate = InstToDuplicate;
826+
return Info;
827+
};
768828

769829
// If we branch to the same successor, partial unswitching will not be
770830
// beneficial.
771831
if (TI->getSuccessor(0) == TI->getSuccessor(1))
772832
return {};
773833

774-
if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
775-
return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
776-
if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
777-
return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
834+
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(),
835+
AccessesToCheck)) {
836+
Info->KnownValue = ConstantInt::getTrue(TI->getContext());
837+
return Info;
838+
}
839+
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(),
840+
AccessesToCheck)) {
841+
Info->KnownValue = ConstantInt::getFalse(TI->getContext());
842+
return Info;
843+
}
778844

779845
return {};
780846
}
@@ -986,17 +1052,56 @@ bool LoopUnswitch::processCurrentLoop() {
9861052
// metadata, to avoid unswitching the same loop multiple times.
9871053
if (MSSA &&
9881054
!findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
989-
auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
990-
if (!ToDuplicate.first.empty()) {
1055+
if (auto Info = hasPartialIVCondition(CurrentLoop, *MSSA, AA)) {
1056+
assert(!Info->InstToDuplicate.empty() &&
1057+
"need at least a partially invariant condition");
9911058
LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
992-
<< *ToDuplicate.first[0] << "\n");
993-
++NumBranches;
994-
unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
995-
CurrentLoop->getHeader()->getTerminator(),
996-
ToDuplicate.first);
1059+
<< *Info->InstToDuplicate[0] << "\n");
1060+
1061+
Instruction *TI = CurrentLoop->getHeader()->getTerminator();
1062+
Value *LoopCond = Info->InstToDuplicate[0];
1063+
1064+
// If the partially unswitched path is a no-op and has a single exit
1065+
// block, we do not need to do full unswitching. Instead, we can directly
1066+
// branch to the exit.
1067+
// TODO: Instead of duplicating the checks, we could also just directly
1068+
// branch to the exit from the conditional branch in the loop.
1069+
if (Info->PathIsNoop) {
1070+
if (HasBranchDivergence &&
1071+
getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
1072+
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
1073+
<< CurrentLoop->getHeader()->getName()
1074+
<< " at non-trivial condition '"
1075+
<< *Info->KnownValue << "' == " << *LoopCond << "\n"
1076+
<< ". Condition is divergent.\n");
1077+
return false;
1078+
}
9971079

998-
RedoLoop = false;
999-
return true;
1080+
++NumBranches;
1081+
1082+
BasicBlock *TrueDest = LoopHeader;
1083+
BasicBlock *FalseDest = Info->ExitForPath;
1084+
if (Info->KnownValue->isOneValue())
1085+
std::swap(TrueDest, FalseDest);
1086+
1087+
auto *OldBr =
1088+
cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
1089+
emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
1090+
FalseDest, OldBr, TI,
1091+
Info->InstToDuplicate);
1092+
delete OldBr;
1093+
RedoLoop = false;
1094+
return true;
1095+
}
1096+
1097+
// Otherwise, the path is not a no-op. Run regular unswitching.
1098+
if (unswitchIfProfitable(LoopCond, Info->KnownValue,
1099+
CurrentLoop->getHeader()->getTerminator(),
1100+
Info->InstToDuplicate)) {
1101+
++NumBranches;
1102+
RedoLoop = false;
1103+
return true;
1104+
}
10001105
}
10011106
}
10021107

llvm/test/Transforms/LoopUnswitch/partial-unswitch-cost.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,16 @@ exit:
4848
define i32 @partial_unswitch_shortcut_mustprogress(i32* %ptr, i32 %N) mustprogress {
4949
; CHECK-LABEL: @partial_unswitch_shortcut_mustprogress
5050
; CHECK-LABEL: entry:
51+
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
52+
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
53+
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:[a-z._]+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
54+
;
55+
; CHECK: [[CRIT_TO_HEADER]]:
5156
; CHECK-NEXT: br label %loop.header
5257
;
58+
; CHECK: [[CRIT_TO_EXIT]]:
59+
; CHECK-NEXT: br label %exit
60+
;
5361
entry:
5462
br label %loop.header
5563

@@ -86,8 +94,16 @@ exit:
8694
define i32 @partial_unswitch_shortcut_mustprogress_single_exit_on_path(i32* %ptr, i32 %N) mustprogress {
8795
; CHECK-LABEL: @partial_unswitch_shortcut_mustprogress_single_exit_on_path
8896
; CHECK-LABEL: entry:
97+
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
98+
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
99+
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:.+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
100+
;
101+
; CHECK: [[CRIT_TO_HEADER]]:
89102
; CHECK-NEXT: br label %loop.header
90103
;
104+
; CHECK: [[CRIT_TO_EXIT]]:
105+
; CHECK-NEXT: br label %exit
106+
;
91107
entry:
92108
br label %loop.header
93109

@@ -208,8 +224,16 @@ exit:
208224
define i32 @partial_unswitch_shortcut_multiple_exiting_blocks(i32* %ptr, i32 %N, i1 %ec.1) mustprogress {
209225
; CHECK-LABEL: @partial_unswitch_shortcut_multiple_exiting_blocks
210226
; CHECK-LABEL: entry:
227+
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
228+
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
229+
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:.+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
230+
;
231+
; CHECK: [[CRIT_TO_HEADER]]:
211232
; CHECK-NEXT: br label %loop.header
212233
;
234+
; CHECK: [[CRIT_TO_EXIT]]:
235+
; CHECK-NEXT: br label %exit
236+
;
213237
entry:
214238
br label %loop.header
215239

0 commit comments

Comments
 (0)