Skip to content

Commit 5aa2c1b

Browse files
m-guptaFerdinand Lemaire
authored andcommitted
Revert "[VPlan] Switch to checking sinking legality for recurrences in VPlan."
This reverts commit 7fc0b30. Causes a clang hang when building xz utils, github issue llvm#62187.
1 parent e6b828c commit 5aa2c1b

File tree

9 files changed

+121
-110
lines changed

9 files changed

+121
-110
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,14 @@ class RecurrenceDescriptor {
186186
/// previous iteration (e.g. if the value is defined in the previous
187187
/// iteration, we refer to it as first-order recurrence, if it is defined in
188188
/// the iteration before the previous, we refer to it as second-order
189-
/// recurrence and so on). Note that this function optimistically assumes that
190-
/// uses of the recurrence can be re-ordered if necessary and users need to
191-
/// check and perform the re-ordering.
192-
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
193-
DominatorTree *DT);
189+
/// recurrence and so on). \p SinkAfter includes pairs of instructions where
190+
/// the first will be rescheduled to appear after the second if/when the loop
191+
/// is vectorized. It may be augmented with additional pairs if needed in
192+
/// order to handle Phi as a first-order recurrence.
193+
static bool
194+
isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
195+
MapVector<Instruction *, Instruction *> &SinkAfter,
196+
DominatorTree *DT);
194197

195198
RecurKind getRecurrenceKind() const { return Kind; }
196199

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,10 @@ class LoopVectorizationLegality {
516516
/// Holds the phi nodes that are fixed-order recurrences.
517517
RecurrenceSet FixedOrderRecurrences;
518518

519+
/// Holds instructions that need to sink past other instructions to handle
520+
/// fixed-order recurrences.
521+
MapVector<Instruction *, Instruction *> SinkAfter;
522+
519523
/// Holds the widest induction type encountered.
520524
Type *WidestIndTy = nullptr;
521525

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -927,8 +927,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
927927
return false;
928928
}
929929

930-
bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
931-
DominatorTree *DT) {
930+
bool RecurrenceDescriptor::isFixedOrderRecurrence(
931+
PHINode *Phi, Loop *TheLoop,
932+
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
932933

933934
// Ensure the phi node is in the loop header and has two incoming values.
934935
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -964,7 +965,8 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
964965
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
965966
}
966967

967-
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
968+
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
969+
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
968970
return false;
969971

970972
// Ensure every user of the phi node (recursively) is dominated by the
@@ -973,9 +975,23 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
973975
// loop.
974976
// TODO: Consider extending this sinking to handle memory instructions.
975977

978+
// We optimistically assume we can sink all users after Previous. Keep a set
979+
// of instructions to sink after Previous ordered by dominance in the common
980+
// basic block. It will be applied to SinkAfter if all users can be sunk.
981+
auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
982+
return A->comesBefore(B);
983+
};
984+
std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
985+
CompareByComesBefore);
986+
976987
BasicBlock *PhiBB = Phi->getParent();
977988
SmallVector<Instruction *, 8> WorkList;
978989
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
990+
// Already sunk SinkCandidate.
991+
if (SinkCandidate->getParent() == PhiBB &&
992+
InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
993+
return true;
994+
979995
// Cyclic dependence.
980996
if (Previous == SinkCandidate)
981997
return false;
@@ -989,12 +1005,55 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
9891005
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
9901006
return false;
9911007

1008+
// Avoid sinking an instruction multiple times (if multiple operands are
1009+
// fixed order recurrences) by sinking once - after the latest 'previous'
1010+
// instruction.
1011+
auto It = SinkAfter.find(SinkCandidate);
1012+
if (It != SinkAfter.end()) {
1013+
auto *OtherPrev = It->second;
1014+
// Find the earliest entry in the 'sink-after' chain. The last entry in
1015+
// the chain is the original 'Previous' for a recurrence handled earlier.
1016+
auto EarlierIt = SinkAfter.find(OtherPrev);
1017+
while (EarlierIt != SinkAfter.end()) {
1018+
Instruction *EarlierInst = EarlierIt->second;
1019+
EarlierIt = SinkAfter.find(EarlierInst);
1020+
// Bail out if order has not been preserved.
1021+
if (EarlierIt != SinkAfter.end() &&
1022+
!DT->dominates(EarlierInst, OtherPrev))
1023+
return false;
1024+
OtherPrev = EarlierInst;
1025+
}
1026+
// Bail out if order has not been preserved.
1027+
if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
1028+
return false;
1029+
1030+
// SinkCandidate is already being sunk after an instruction after
1031+
// Previous. Nothing left to do.
1032+
if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
1033+
return true;
1034+
1035+
// If there are other instructions to be sunk after SinkCandidate, remove
1036+
// and re-insert SinkCandidate can break those instructions. Bail out for
1037+
// simplicity.
1038+
if (any_of(SinkAfter,
1039+
[SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
1040+
return P.second == SinkCandidate;
1041+
}))
1042+
return false;
1043+
1044+
// Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
1045+
// re-sunk to Previous, instead of sinking to OtherPrev. Remove
1046+
// SinkCandidate from SinkAfter to ensure it's insert position is updated.
1047+
SinkAfter.erase(SinkCandidate);
1048+
}
1049+
9921050
// If we reach a PHI node that is not dominated by Previous, we reached a
9931051
// header PHI. No need for sinking.
9941052
if (isa<PHINode>(SinkCandidate))
9951053
return true;
9961054

9971055
// Sink User tentatively and check its users
1056+
InstrsToSink.insert(SinkCandidate);
9981057
WorkList.push_back(SinkCandidate);
9991058
return true;
10001059
};
@@ -1009,6 +1068,11 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
10091068
}
10101069
}
10111070

1071+
// We can sink all users of Phi. Update the mapping.
1072+
for (Instruction *I : InstrsToSink) {
1073+
SinkAfter[I] = Previous;
1074+
Previous = I;
1075+
}
10121076
return true;
10131077
}
10141078

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
743743
continue;
744744
}
745745

746-
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
746+
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
747+
SinkAfter, DT)) {
747748
AllowedExit.insert(Phi);
748749
FixedOrderRecurrences.insert(Phi);
749750
continue;
@@ -916,6 +917,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
916917
}
917918
}
918919

920+
// For fixed order recurrences, we use the previous value (incoming value from
921+
// the latch) to check if it dominates all users of the recurrence. Bail out
922+
// if we have to sink such an instruction for another recurrence, as the
923+
// dominance requirement may not hold after sinking.
924+
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
925+
if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
926+
Instruction *V =
927+
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
928+
return SinkAfter.contains(V);
929+
}))
930+
return false;
931+
919932
// Now we know the widest induction type, check if our found induction
920933
// is the same size. If it's not, unset it here and InnerLoopVectorizer
921934
// will create another.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9055,8 +9055,7 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
90559055

90569056
// Sink users of fixed-order recurrence past the recipe defining the previous
90579057
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
9058-
if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
9059-
return std::nullopt;
9058+
VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
90609059

90619060
// Interleave memory: for each Interleave Group we marked earlier as relevant
90629061
// for this VPlan, replace the Recipes widening its memory instructions with a

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -657,10 +657,9 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
657657
return VPDT.properlyDominates(ParentA, ParentB);
658658
}
659659

660-
/// Sink users of \p FOR after the recipe defining the previous value \p
661-
/// Previous of the recurrence. \returns true if all users of \p FOR could be
662-
/// re-arranged as needed or false if it is not possible.
663-
static bool
660+
// Sink users of \p FOR after the recipe defining the previous value \p Previous
661+
// of the recurrence.
662+
static void
664663
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
665664
VPRecipeBase *Previous,
666665
VPDominatorTree &VPDT) {
@@ -669,18 +668,15 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
669668
SmallPtrSet<VPRecipeBase *, 8> Seen;
670669
Seen.insert(Previous);
671670
auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
672-
// The previous value must not depend on the users of the recurrence phi. In
673-
// that case, FOR is not a fixed order recurrence.
674-
if (SinkCandidate == Previous)
675-
return false;
676-
671+
assert(
672+
SinkCandidate != Previous &&
673+
"The previous value cannot depend on the users of the recurrence phi.");
677674
if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
678675
!Seen.insert(SinkCandidate).second ||
679676
properlyDominates(Previous, SinkCandidate, VPDT))
680-
return true;
677+
return;
681678

682679
WorkList.push_back(SinkCandidate);
683-
return true;
684680
};
685681

686682
// Recursively sink users of FOR after Previous.
@@ -691,8 +687,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
691687
"only recipes with a single defined value expected");
692688
for (VPUser *User : Current->getVPSingleValue()->users()) {
693689
if (auto *R = dyn_cast<VPRecipeBase>(User))
694-
if (!TryToPushSinkCandidate(R))
695-
return false;
690+
TryToPushSinkCandidate(R);
696691
}
697692
}
698693

@@ -709,10 +704,9 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
709704
SinkCandidate->moveAfter(Previous);
710705
Previous = SinkCandidate;
711706
}
712-
return true;
713707
}
714708

715-
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
709+
void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
716710
VPBuilder &Builder) {
717711
VPDominatorTree VPDT;
718712
VPDT.recalculate(Plan);
@@ -735,8 +729,7 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
735729
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
736730
}
737731

738-
if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
739-
return false;
732+
sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
740733

741734
// Introduce a recipe to combine the incoming and previous values of a
742735
// fixed-order recurrence.
@@ -755,5 +748,4 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
755748
// all users.
756749
RecurSplice->setOperand(0, FOR);
757750
}
758-
return true;
759751
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,7 @@ struct VPlanTransforms {
7777
/// to combine the value from the recurrence phis and previous values. The
7878
/// current implementation assumes all users can be sunk after the previous
7979
/// value, which is enforced by earlier legality checks.
80-
/// \returns true if all users of fixed-order recurrences could be re-arranged
81-
/// as needed or false if it is not possible. In the latter case, \p Plan is
82-
/// not valid.
83-
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
80+
static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
8481

8582
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
8683
/// resulting plan to \p BestVF and \p BestUF.

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -647,40 +647,12 @@ exit:
647647
ret ptr %for.1
648648
}
649649

650-
; In this test case, %USE_2_FORS uses 2 different fixed-order recurrences and
651-
; it needs to be sunk past the previous value for both recurrences.
652-
define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
653-
; CHECK-LABEL: @test_resinking_required(
654-
; CHECK: vector.body:
655-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
656-
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
657-
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
658-
; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
659-
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8
660-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
661-
; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
662-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
663-
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
664-
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8
665-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
666-
; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
667-
; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
668-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
669-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
670-
; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
671-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
672-
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
673-
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
674-
; CHECK: middle.block:
675-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
676-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
677-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
678-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
679-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
680-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
681-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
682-
; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph
683-
;
650+
; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
651+
; loop vectorizer.
652+
define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
653+
; CHECK-LABEL: @test_crash
654+
; CHECK-NOT: vector.body:
655+
; CHECK: ret
684656
Entry:
685657
br label %Loop
686658

@@ -689,7 +661,7 @@ Loop:
689661
%for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
690662
%for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
691663
%iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
692-
%USE_2_FORS = fdiv double %for.3, %for.1
664+
%USE_2_INDVARS = fdiv double %for.3, %for.1
693665
%div = fdiv double 0.000000e+00, %for.1
694666
%l1 = load double, ptr %a, align 8
695667
%iv.next= add nuw nsw i64 %iv, 1

0 commit comments

Comments
 (0)