@@ -2974,7 +2974,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2974
2974
fixupIVUsers (Entry.first , Entry.second ,
2975
2975
getOrCreateVectorTripCount (nullptr ),
2976
2976
IVEndValues[Entry.first ], LoopMiddleBlock, State);
2977
- IVEndValues[Entry.first ], LoopMiddleBlock, Plan, State);
2978
2977
}
2979
2978
2980
2979
for (Instruction *PI : PredicatedInstructions)
@@ -8710,13 +8709,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8710
8709
// directly, enabling more efficient codegen.
8711
8710
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
8712
8711
} else if (Legal->isCSAPhi (Phi)) {
8713
- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8714
- VPValue *InitData = State->getVPInitData ();
8712
+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8713
+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8714
+
8715
+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8716
+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8717
+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8718
+
8715
8719
// When the VF=getFixed(1), InitData is just InitScalar.
8716
- if (!InitData)
8717
- InitData = State->getVPInitScalar ();
8720
+ VPValue *InitData =
8721
+ IsScalarVF ? InitScalar
8722
+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
8718
8723
PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8719
- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
8720
8724
} else {
8721
8725
llvm_unreachable (
8722
8726
" can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8757,13 +8761,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8757
8761
return CSADescriptor::isCSASelect (CSA.second , SI);
8758
8762
});
8759
8763
if (CSADescIt != Legal->getCSAs ().end ()) {
8760
- PHINode *CSAPhi = CSADescIt->first ;
8761
- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8762
- VPValue *VPDataPhi = State->getPhiRecipe ();
8763
- auto *R = new VPCSADataUpdateRecipe (
8764
- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8765
- State->setDataUpdate (R);
8766
- return R;
8764
+ for (VPRecipeBase &R :
8765
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8766
+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8767
+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8768
+ auto *R = new VPCSADataUpdateRecipe (
8769
+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8770
+ PhiR->setDataUpdate (R);
8771
+ return R;
8772
+ }
8773
+ }
8774
+ }
8767
8775
}
8768
8776
8769
8777
return new VPWidenSelectRecipe (
@@ -8778,44 +8786,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8778
8786
return tryToWiden (Instr, Operands, VPBB);
8779
8787
}
8780
8788
8781
- // / Add CSA Recipes that can occur before each instruction in the input IR
8782
- // / is processed and introduced into VPlan.
8783
- static void
8784
- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8785
- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8786
- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8787
- VPlan &Plan, VPRecipeBuilder &Builder) {
8788
-
8789
- // Don't build full CSA for VF=ElementCount::getFixed(1)
8790
- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8791
- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8792
-
8793
- for (const auto &CSA : CSAs) {
8794
- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8795
- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8796
-
8797
- // Scalar VF builds the scalar version of the loop. In that case,
8798
- // no maintenence of mask nor extraction in middle block is needed.
8799
- if (IsScalarVF) {
8800
- VPCSAState *S = new VPCSAState (VPInitScalar);
8801
- Plan.addCSAState (CSA.first , S);
8802
- continue ;
8803
- }
8804
-
8805
- VPBuilder PHB (PreheaderVPBB);
8806
- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8807
- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8808
- auto *VPInitData =
8809
- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8810
-
8811
- VPBuilder HB (HeaderVPBB);
8812
- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8813
-
8814
- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8815
- Plan.addCSAState (CSA.first , S);
8816
- }
8817
- }
8818
-
8819
8789
// / Add CSA Recipes that must occur after each instruction in the input IR
8820
8790
// / is processed and introduced into VPlan.
8821
8791
static void
@@ -8828,60 +8798,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
8828
8798
[&](ElementCount VF) { return VF.isScalar (); }, Range))
8829
8799
return ;
8830
8800
8801
+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8831
8802
for (const auto &CSA : CSAs) {
8832
- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8833
- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8803
+ // Build the MaskPhi recipe.
8804
+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8805
+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8806
+ VPBuilder B;
8807
+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8808
+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8809
+ B.clearInsertionPoint ();
8834
8810
8835
- assert (VPDataUpdate &&
8836
- " VPDataUpdate must have been introduced prior to postprocess" );
8837
- assert (CSA.second .getCond () &&
8838
- " CSADescriptor must know how to describe the condition" );
8839
8811
auto GetVPValue = [&](Value *I) {
8840
8812
return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
8841
8813
};
8842
- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8843
- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8814
+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8815
+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
8844
8816
8845
8817
// The CSA optimization wants to use a condition such that when it is
8846
8818
// true, a new value is assigned. However, it is possible that a true lane
8847
8819
// in WidenedCond corresponds to selection of the initial value instead.
8848
8820
// In that case, we must use the negation of WidenedCond.
8849
8821
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8822
+ assert (CSA.second .getCond () &&
8823
+ " CSADescriptor must know how to describe the condition" );
8824
+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
8850
8825
VPValue *CondToUse = WidenedCond;
8851
- VPBuilder B;
8852
8826
if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
8853
8827
CSA.first ) {
8854
8828
auto *VPNotCond = B.createNot (WidenedCond, DL);
8855
- VPNotCond->insertBefore (
8856
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8829
+ VPNotCond->insertBefore (VPDataUpdate);
8857
8830
CondToUse = VPNotCond;
8858
8831
}
8859
8832
8860
- auto *VPAnyActive =
8861
- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8862
- VPAnyActive->insertBefore (
8863
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8833
+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8834
+ VPAnyActive->insertBefore (VPDataUpdate);
8864
8835
8865
- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8866
- VPAnyActive, DL, " csa.mask.sel" );
8836
+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8837
+ " csa.mask.sel" );
8867
8838
VPMaskSel->insertAfter (VPAnyActive);
8839
+
8868
8840
VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8841
+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8842
+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8843
+ SmallVector<PHINode *> PhiToFix;
8844
+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8845
+ if (auto *Phi = dyn_cast<PHINode>(U);
8846
+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8847
+ PhiToFix.emplace_back (Phi);
8869
8848
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8870
- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8871
-
8849
+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8850
+ PhiToFix);
8872
8851
MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8873
-
8874
- // Update CSAState with new recipes
8875
- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8876
- CSAState->setVPAnyActive (VPAnyActive);
8877
-
8878
- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8879
- // Phi users in the unique exit block of the original updated value.
8880
- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8881
- assert (OrigExit && " Expected a single exit block" );
8882
- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8883
- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8884
- Plan.addLiveOut (Phi, ExtractScalarRecipe);
8885
8852
}
8886
8853
}
8887
8854
@@ -9199,11 +9166,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9199
9166
9200
9167
VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
9201
9168
9202
- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9203
- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9204
- Range, *Plan, RecipeBuilder);
9205
-
9206
-
9207
9169
// ---------------------------------------------------------------------------
9208
9170
// Pre-construction: record ingredients whose recipes we'll need to further
9209
9171
// process after constructing the initial VPlan.
0 commit comments