Skip to content

Commit 95a9b9a

Browse files
author
git apple-llvm automerger
committed
Merge commit '08fbe73e065b' from apple/stable/20210107 into swift/rebranch
2 parents 7c64f9a + 08fbe73 commit 95a9b9a

File tree

4 files changed

+685
-109
lines changed

4 files changed

+685
-109
lines changed

llvm/lib/Transforms/Scalar/LoopUnswitch.cpp

Lines changed: 183 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
//===----------------------------------------------------------------------===//
2727

2828
#include "llvm/ADT/DenseMap.h"
29+
#include "llvm/ADT/STLExtras.h"
2930
#include "llvm/ADT/SmallPtrSet.h"
3031
#include "llvm/ADT/SmallVector.h"
3132
#include "llvm/ADT/Statistic.h"
@@ -640,6 +641,26 @@ static bool equalityPropUnSafe(Value &LoopCond) {
640641
return false;
641642
}
642643

644+
namespace {
645+
/// Struct to hold information about a partially invariant condition.
646+
struct IVConditionInfo {
647+
/// Instructions that need to be duplicated and checked for the unswitching
648+
/// condition.
649+
SmallVector<Instruction *, 4> InstToDuplicate;
650+
651+
/// Constant to indicate for which value the condition is invariant.
652+
Constant *KnownValue = nullptr;
653+
654+
/// True if the partially invariant path is no-op (=does not have any
655+
/// side-effects and no loop value is used outside the loop).
656+
bool PathIsNoop = true;
657+
658+
/// If the partially invariant path reaches a single exit block, ExitForPath
659+
/// is set to that block. Otherwise it is nullptr.
660+
BasicBlock *ExitForPath = nullptr;
661+
};
662+
} // namespace
663+
643664
/// Check if the loop header has a conditional branch that is not
644665
/// loop-invariant, because it involves load instructions. If all paths from
645666
/// either the true or false successor to the header or loop exists do not
@@ -651,9 +672,8 @@ static bool equalityPropUnSafe(Value &LoopCond) {
651672
/// If the branch condition of the header is partially invariant, return a pair
652673
/// containing the instructions to duplicate and a boolean Constant to update
653674
/// the condition in the loops created for the true or false successors.
654-
static std::pair<SmallVector<Instruction *, 4>, Constant *>
655-
hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
656-
SmallVector<Instruction *, 4> ToDuplicate;
675+
static Optional<IVConditionInfo> hasPartialIVCondition(Loop *L, MemorySSA &MSSA,
676+
AAResults *AA) {
657677

658678
auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
659679
if (!TI || !TI->isConditional())
@@ -665,7 +685,8 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
665685
if (!CondI || !L->contains(CondI))
666686
return {};
667687

668-
ToDuplicate.push_back(CondI);
688+
SmallVector<Instruction *, 4> InstToDuplicate;
689+
InstToDuplicate.push_back(CondI);
669690

670691
SmallVector<Value *, 4> WorkList;
671692
WorkList.append(CondI->op_begin(), CondI->op_end());
@@ -686,7 +707,7 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
686707
if (LI->isVolatile() || LI->isAtomic())
687708
return {};
688709

689-
ToDuplicate.push_back(I);
710+
InstToDuplicate.push_back(I);
690711
if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
691712
if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
692713
// Queue the defining access to check for alias checks.
@@ -701,80 +722,126 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
701722
WorkList.append(I->op_begin(), I->op_end());
702723
}
703724

704-
if (ToDuplicate.size() <= 1)
725+
if (InstToDuplicate.size() <= 1)
705726
return {};
706727

728+
SmallVector<BasicBlock *, 4> ExitingBlocks;
729+
L->getExitingBlocks(ExitingBlocks);
707730
auto HasNoClobbersOnPath =
708-
[L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
709-
SmallVector<MemoryAccess *, 4> AccessesToCheck) {
710-
// First, collect all blocks in the loop that are on a patch from Succ
711-
// to the header.
712-
SmallVector<BasicBlock *, 4> WorkList;
713-
WorkList.push_back(Succ);
714-
WorkList.push_back(Header);
715-
SmallPtrSet<BasicBlock *, 4> Seen;
716-
Seen.insert(Header);
717-
while (!WorkList.empty()) {
718-
BasicBlock *Current = WorkList.pop_back_val();
719-
if (!L->contains(Current))
720-
continue;
721-
const auto &SeenIns = Seen.insert(Current);
722-
if (!SeenIns.second)
723-
continue;
731+
[L, AA, &AccessedLocs, &ExitingBlocks,
732+
&InstToDuplicate](BasicBlock *Succ, BasicBlock *Header,
733+
SmallVector<MemoryAccess *, 4> AccessesToCheck)
734+
-> Optional<IVConditionInfo> {
735+
IVConditionInfo Info;
736+
// First, collect all blocks in the loop that are on a patch from Succ
737+
// to the header.
738+
SmallVector<BasicBlock *, 4> WorkList;
739+
WorkList.push_back(Succ);
740+
WorkList.push_back(Header);
741+
SmallPtrSet<BasicBlock *, 4> Seen;
742+
Seen.insert(Header);
743+
Info.PathIsNoop &=
744+
all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
745+
746+
while (!WorkList.empty()) {
747+
BasicBlock *Current = WorkList.pop_back_val();
748+
if (!L->contains(Current))
749+
continue;
750+
const auto &SeenIns = Seen.insert(Current);
751+
if (!SeenIns.second)
752+
continue;
724753

725-
WorkList.append(succ_begin(Current), succ_end(Current));
726-
}
754+
Info.PathIsNoop &= all_of(
755+
*Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
756+
WorkList.append(succ_begin(Current), succ_end(Current));
757+
}
727758

728-
// Require at least 2 blocks on a path through the loop. This skips
729-
// paths that directly exit the loop.
730-
if (Seen.size() < 2)
731-
return false;
759+
// Require at least 2 blocks on a path through the loop. This skips
760+
// paths that directly exit the loop.
761+
if (Seen.size() < 2)
762+
return {};
732763

733-
// Next, check if there are any MemoryDefs that are on the path through
734-
// the loop (in the Seen set) and they may-alias any of the locations in
735-
// AccessedLocs. If that is the case, they may modify the condition and
736-
// partial unswitching is not possible.
737-
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
738-
while (!AccessesToCheck.empty()) {
739-
MemoryAccess *Current = AccessesToCheck.pop_back_val();
740-
auto SeenI = SeenAccesses.insert(Current);
741-
if (!SeenI.second || !Seen.contains(Current->getBlock()))
742-
continue;
764+
// Next, check if there are any MemoryDefs that are on the path through
765+
// the loop (in the Seen set) and they may-alias any of the locations in
766+
// AccessedLocs. If that is the case, they may modify the condition and
767+
// partial unswitching is not possible.
768+
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
769+
while (!AccessesToCheck.empty()) {
770+
MemoryAccess *Current = AccessesToCheck.pop_back_val();
771+
auto SeenI = SeenAccesses.insert(Current);
772+
if (!SeenI.second || !Seen.contains(Current->getBlock()))
773+
continue;
743774

744-
// Bail out if exceeded the threshold.
745-
if (SeenAccesses.size() >= MSSAThreshold)
746-
return false;
775+
// Bail out if exceeded the threshold.
776+
if (SeenAccesses.size() >= MSSAThreshold)
777+
return {};
747778

748-
// MemoryUse are read-only accesses.
749-
if (isa<MemoryUse>(Current))
750-
continue;
779+
// MemoryUse are read-only accesses.
780+
if (isa<MemoryUse>(Current))
781+
continue;
751782

752-
// For a MemoryDef, check if is aliases any of the location feeding
753-
// the original condition.
754-
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
755-
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
756-
return isModSet(
757-
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
758-
}))
759-
return false;
760-
}
783+
// For a MemoryDef, check if is aliases any of the location feeding
784+
// the original condition.
785+
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
786+
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
787+
return isModSet(
788+
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
789+
}))
790+
return {};
791+
}
792+
793+
for (Use &U : Current->uses())
794+
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
795+
}
761796

762-
for (Use &U : Current->uses())
763-
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
797+
// We could also allow loops with known trip counts without mustprogress,
798+
// but ScalarEvolution may not be available.
799+
Info.PathIsNoop &=
800+
L->getHeader()->getParent()->mustProgress() || hasMustProgress(L);
801+
802+
// If the path is considered a no-op so far, check if it reaches a
803+
// single exit block without any phis. This ensures no values from the
804+
// loop are used outside of the loop.
805+
if (Info.PathIsNoop) {
806+
for (auto *Exiting : ExitingBlocks) {
807+
if (!Seen.contains(Exiting))
808+
continue;
809+
for (auto *Succ : successors(Exiting)) {
810+
if (L->contains(Succ))
811+
continue;
812+
813+
Info.PathIsNoop &= llvm::empty(Succ->phis()) &&
814+
(!Info.ExitForPath || Info.ExitForPath == Succ);
815+
if (!Info.PathIsNoop)
816+
break;
817+
assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
818+
"cannot have multiple exit blocks");
819+
Info.ExitForPath = Succ;
764820
}
821+
}
822+
}
823+
if (!Info.ExitForPath)
824+
Info.PathIsNoop = false;
765825

766-
return true;
767-
};
826+
Info.InstToDuplicate = InstToDuplicate;
827+
return Info;
828+
};
768829

769830
// If we branch to the same successor, partial unswitching will not be
770831
// beneficial.
771832
if (TI->getSuccessor(0) == TI->getSuccessor(1))
772833
return {};
773834

774-
if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
775-
return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
776-
if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
777-
return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
835+
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(),
836+
AccessesToCheck)) {
837+
Info->KnownValue = ConstantInt::getTrue(TI->getContext());
838+
return Info;
839+
}
840+
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(),
841+
AccessesToCheck)) {
842+
Info->KnownValue = ConstantInt::getFalse(TI->getContext());
843+
return Info;
844+
}
778845

779846
return {};
780847
}
@@ -986,17 +1053,56 @@ bool LoopUnswitch::processCurrentLoop() {
9861053
// metadata, to avoid unswitching the same loop multiple times.
9871054
if (MSSA &&
9881055
!findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
989-
auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
990-
if (!ToDuplicate.first.empty()) {
1056+
if (auto Info = hasPartialIVCondition(CurrentLoop, *MSSA, AA)) {
1057+
assert(!Info->InstToDuplicate.empty() &&
1058+
"need at least a partially invariant condition");
9911059
LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
992-
<< *ToDuplicate.first[0] << "\n");
993-
++NumBranches;
994-
unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
995-
CurrentLoop->getHeader()->getTerminator(),
996-
ToDuplicate.first);
1060+
<< *Info->InstToDuplicate[0] << "\n");
1061+
1062+
Instruction *TI = CurrentLoop->getHeader()->getTerminator();
1063+
Value *LoopCond = Info->InstToDuplicate[0];
1064+
1065+
// If the partially unswitched path is a no-op and has a single exit
1066+
// block, we do not need to do full unswitching. Instead, we can directly
1067+
// branch to the exit.
1068+
// TODO: Instead of duplicating the checks, we could also just directly
1069+
// branch to the exit from the conditional branch in the loop.
1070+
if (Info->PathIsNoop) {
1071+
if (HasBranchDivergence &&
1072+
getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
1073+
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
1074+
<< CurrentLoop->getHeader()->getName()
1075+
<< " at non-trivial condition '"
1076+
<< *Info->KnownValue << "' == " << *LoopCond << "\n"
1077+
<< ". Condition is divergent.\n");
1078+
return false;
1079+
}
9971080

998-
RedoLoop = false;
999-
return true;
1081+
++NumBranches;
1082+
1083+
BasicBlock *TrueDest = LoopHeader;
1084+
BasicBlock *FalseDest = Info->ExitForPath;
1085+
if (Info->KnownValue->isOneValue())
1086+
std::swap(TrueDest, FalseDest);
1087+
1088+
auto *OldBr =
1089+
cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
1090+
emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
1091+
FalseDest, OldBr, TI,
1092+
Info->InstToDuplicate);
1093+
delete OldBr;
1094+
RedoLoop = false;
1095+
return true;
1096+
}
1097+
1098+
// Otherwise, the path is not a no-op. Run regular unswitching.
1099+
if (unswitchIfProfitable(LoopCond, Info->KnownValue,
1100+
CurrentLoop->getHeader()->getTerminator(),
1101+
Info->InstToDuplicate)) {
1102+
++NumBranches;
1103+
RedoLoop = false;
1104+
return true;
1105+
}
10001106
}
10011107
}
10021108

@@ -1114,12 +1220,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
11141220

11151221
Loop *L = LI->getLoopFor(I->getParent());
11161222
auto *DefiningAccess = MemA->getDefiningAccess();
1117-
// If the defining access is a MemoryPhi in the header, get the incoming
1118-
// value for the pre-header as defining access.
1119-
if (DefiningAccess->getBlock() == I->getParent()) {
1223+
// Get the first defining access before the loop.
1224+
while (L->contains(DefiningAccess->getBlock())) {
1225+
// If the defining access is a MemoryPhi, get the incoming
1226+
// value for the pre-header as defining access.
11201227
if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
11211228
DefiningAccess =
11221229
MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
1230+
} else {
1231+
DefiningAccess =
1232+
cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
11231233
}
11241234
}
11251235
MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),

0 commit comments

Comments
 (0)