-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Pass NumUnrolledElems as operand to VPWidenPointerInductionRecipe. NFC #119859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Pass NumUnrolledElems as operand to VPWidenPointerInductionRecipe. NFC #119859
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesSimilarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF steps, but currently the VF is hard-wired in VPWidenPointerInductionRecipe. This adds an operand for the VF and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line. Full diff: https://github.com/llvm/llvm-project/pull/119859.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index de164ee434d647..126d1c66568aaa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8429,7 +8429,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
*PSE.getSE());
return new VPWidenPointerInductionRecipe(
- Phi, Operands[0], Step, *II,
+ Phi, Operands[0], Step, &Plan.getVF(), *II,
LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
return CM.isScalarAfterVectorization(Phi, VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8794517b777f3b..77d68dd1bdc1ad 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2188,7 +2188,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
};
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
- public VPUnrollPartAccessor<3> {
+ public VPUnrollPartAccessor<4> {
const InductionDescriptor &IndDesc;
bool IsScalarAfterVectorization;
@@ -2197,13 +2197,14 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc,
+ VPValue *VF, const InductionDescriptor &IndDesc,
bool IsScalarAfterVectorization)
: VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
IndDesc(IndDesc),
IsScalarAfterVectorization(IsScalarAfterVectorization) {
addOperand(Start);
addOperand(Step);
+ addOperand(VF);
}
~VPWidenPointerInductionRecipe() override = default;
@@ -2211,7 +2212,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
- IndDesc, IsScalarAfterVectorization);
+ getOperand(2), IndDesc, IsScalarAfterVectorization);
}
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -2229,7 +2230,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
/// the first unrolled part, if it exists. Returns itself if unrolling did not
/// take place.
VPValue *getFirstUnrolledPartOperand() {
- return getUnrollPart(*this) == 0 ? this : getOperand(2);
+ return getUnrollPart(*this) == 0 ? this : getOperand(3);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..2e69f6fab26615 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3176,7 +3176,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
Value *ScalarStepValue = State.get(getOperand(1), VPLane(0));
Type *PhiType = IndDesc.getStep()->getType();
- Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
+ Value *RuntimeVF = State.get(getOperand(2), true);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
// block cannot be used, as it does not exist yet.
@@ -3221,18 +3221,19 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- assert((getNumOperands() == 2 || getNumOperands() == 4) &&
+ assert((getNumOperands() == 3 || getNumOperands() == 5) &&
"unexpected number of operands");
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
- O << ", " << *IndDesc.getStep();
- if (getNumOperands() == 4) {
- O << ", ";
- getOperand(2)->printAsOperand(O, SlotTracker);
+ O << ", " << *IndDesc.getStep() << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ if (getNumOperands() == 5) {
O << ", ";
getOperand(3)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(4)->printAsOperand(O, SlotTracker);
}
}
#endif
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index c3f753af6fb53c..ae6fb711c87c17 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
;
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<%N> = original trip-count
@@ -19,7 +20,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
-; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1
+; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1, vp<[[VF]]>
; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8>
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>
; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]>
@@ -61,11 +62,9 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -154,11 +153,9 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 4b096e17a4fa0e..e232fea1b2f2e0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -240,8 +240,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
@@ -315,8 +314,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 822d5c8d169175..c441bf643471f4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -172,11 +172,9 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 1
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -745,11 +743,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
-; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
+; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 1
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
+; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -758,11 +754,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
-; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
-; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
+; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP13]], 1
; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
-; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
+; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
|
@llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesSimilarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF steps, but currently the VF is hard-wired in VPWidenPointerInductionRecipe. This adds an operand for the VF and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line. Full diff: https://github.com/llvm/llvm-project/pull/119859.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index de164ee434d647..126d1c66568aaa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8429,7 +8429,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
*PSE.getSE());
return new VPWidenPointerInductionRecipe(
- Phi, Operands[0], Step, *II,
+ Phi, Operands[0], Step, &Plan.getVF(), *II,
LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
return CM.isScalarAfterVectorization(Phi, VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8794517b777f3b..77d68dd1bdc1ad 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2188,7 +2188,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
};
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
- public VPUnrollPartAccessor<3> {
+ public VPUnrollPartAccessor<4> {
const InductionDescriptor &IndDesc;
bool IsScalarAfterVectorization;
@@ -2197,13 +2197,14 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc,
+ VPValue *VF, const InductionDescriptor &IndDesc,
bool IsScalarAfterVectorization)
: VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
IndDesc(IndDesc),
IsScalarAfterVectorization(IsScalarAfterVectorization) {
addOperand(Start);
addOperand(Step);
+ addOperand(VF);
}
~VPWidenPointerInductionRecipe() override = default;
@@ -2211,7 +2212,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
- IndDesc, IsScalarAfterVectorization);
+ getOperand(2), IndDesc, IsScalarAfterVectorization);
}
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -2229,7 +2230,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
/// the first unrolled part, if it exists. Returns itself if unrolling did not
/// take place.
VPValue *getFirstUnrolledPartOperand() {
- return getUnrollPart(*this) == 0 ? this : getOperand(2);
+ return getUnrollPart(*this) == 0 ? this : getOperand(3);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..2e69f6fab26615 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3176,7 +3176,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
Value *ScalarStepValue = State.get(getOperand(1), VPLane(0));
Type *PhiType = IndDesc.getStep()->getType();
- Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
+ Value *RuntimeVF = State.get(getOperand(2), true);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
// block cannot be used, as it does not exist yet.
@@ -3221,18 +3221,19 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- assert((getNumOperands() == 2 || getNumOperands() == 4) &&
+ assert((getNumOperands() == 3 || getNumOperands() == 5) &&
"unexpected number of operands");
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
- O << ", " << *IndDesc.getStep();
- if (getNumOperands() == 4) {
- O << ", ";
- getOperand(2)->printAsOperand(O, SlotTracker);
+ O << ", " << *IndDesc.getStep() << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ if (getNumOperands() == 5) {
O << ", ";
getOperand(3)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(4)->printAsOperand(O, SlotTracker);
}
}
#endif
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index c3f753af6fb53c..ae6fb711c87c17 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
;
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<%N> = original trip-count
@@ -19,7 +20,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
-; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1
+; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1, vp<[[VF]]>
; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8>
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>
; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]>
@@ -61,11 +62,9 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -154,11 +153,9 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 4b096e17a4fa0e..e232fea1b2f2e0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -240,8 +240,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
@@ -315,8 +314,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 822d5c8d169175..c441bf643471f4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -172,11 +172,9 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 1
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -745,11 +743,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
-; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
+; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 1
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
+; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -758,11 +754,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
-; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
-; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
+; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP13]], 1
; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
-; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
+; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
|
48dcf3b
to
64b83e5
Compare
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p | ||
/// Start. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extended the comment for the new parameter
Similarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF steps, but currently the VF is hard-wired in VPWidenPointerInductionRecipe. This adds an operand for the VF and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line.
64b83e5
to
21a2bb7
Compare
…ze/VPWidenPointerInductionRecipe-vp-operand
Ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/14877 Here is the relevant piece of the build log for the reference
|
Similarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF*UF steps, but currently this is hard-wired in VPWidenPointerInductionRecipe.
This adds an operand for the number of elements unrolled and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line.