Skip to content

Commit 64b83e5

Browse files
committed
[VPlan] Pass VF as operand to VPWidenPointerInductionRecipe
Similarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF steps, but currently the VF is hard-wired in VPWidenPointerInductionRecipe. This adds an operand for the VF and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line.
1 parent 2806705 commit 64b83e5

File tree

6 files changed

+30
-36
lines changed

6 files changed

+30
-36
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8460,7 +8460,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
84608460
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
84618461
*PSE.getSE());
84628462
return new VPWidenPointerInductionRecipe(
8463-
Phi, Operands[0], Step, *II,
8463+
Phi, Operands[0], Step, &Plan.getVF(), *II,
84648464
LoopVectorizationPlanner::getDecisionAndClampRange(
84658465
[&](ElementCount VF) {
84668466
return CM.isScalarAfterVectorization(Phi, VF);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,25 +2213,28 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
22132213
};
22142214

22152215
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
2216-
public VPUnrollPartAccessor<3> {
2216+
public VPUnrollPartAccessor<4> {
22172217
bool IsScalarAfterVectorization;
22182218

22192219
public:
22202220
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
22212221
/// Start.
22222222
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2223-
const InductionDescriptor &IndDesc,
2223+
VPValue *VF, const InductionDescriptor &IndDesc,
22242224
bool IsScalarAfterVectorization, DebugLoc DL)
22252225
: VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
22262226
Step, IndDesc, DL),
2227-
IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2227+
IsScalarAfterVectorization(IsScalarAfterVectorization) {
2228+
addOperand(VF);
2229+
}
22282230

22292231
~VPWidenPointerInductionRecipe() override = default;
22302232

22312233
VPWidenPointerInductionRecipe *clone() override {
22322234
return new VPWidenPointerInductionRecipe(
22332235
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2234-
getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2236+
getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2237+
getDebugLoc());
22352238
}
22362239

22372240
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -2246,7 +2249,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
22462249
/// the first unrolled part, if it exists. Returns itself if unrolling did not
22472250
/// take place.
22482251
VPValue *getFirstUnrolledPartOperand() {
2249-
return getUnrollPart(*this) == 0 ? this : getOperand(2);
2252+
return getUnrollPart(*this) == 0 ? this : getOperand(3);
22502253
}
22512254

22522255
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3184,7 +3184,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
31843184
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
31853185
Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
31863186
Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3187-
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3187+
Value *RuntimeVF = State.get(getOperand(2), true);
31883188
// Add induction update using an incorrect block temporarily. The phi node
31893189
// will be fixed after VPlan execution. Note that at this point the latch
31903190
// block cannot be used, as it does not exist yet.
@@ -3229,19 +3229,21 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
32293229
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
32303230
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
32313231
VPSlotTracker &SlotTracker) const {
3232-
assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3232+
assert((getNumOperands() == 3 || getNumOperands() == 5) &&
32333233
"unexpected number of operands");
32343234
O << Indent << "EMIT ";
32353235
printAsOperand(O, SlotTracker);
32363236
O << " = WIDEN-POINTER-INDUCTION ";
32373237
getStartValue()->printAsOperand(O, SlotTracker);
32383238
O << ", ";
32393239
getStepValue()->printAsOperand(O, SlotTracker);
3240-
if (getNumOperands() == 4) {
3241-
O << ", ";
3242-
getOperand(2)->printAsOperand(O, SlotTracker);
3240+
O << ", ";
3241+
getOperand(2)->printAsOperand(O, SlotTracker);
3242+
if (getNumOperands() == 5) {
32433243
O << ", ";
32443244
getOperand(3)->printAsOperand(O, SlotTracker);
3245+
O << ", ";
3246+
getOperand(4)->printAsOperand(O, SlotTracker);
32453247
}
32463248
}
32473249
#endif

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
99
; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
1010
;
1111
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
12+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
1213
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1314
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
1415
; CHECK-NEXT: Live-in ir<%N> = original trip-count
@@ -22,7 +23,7 @@ target triple = "aarch64-unknown-linux-gnu"
2223
; CHECK-NEXT: <x1> vector loop: {
2324
; CHECK-NEXT: vector.body:
2425
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
25-
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1>
26+
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1>, vp<[[VF]]>
2627
; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8>
2728
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>
2829
; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]>
@@ -64,11 +65,9 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
6465
; CHECK: vector.body:
6566
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
6667
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
67-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
68-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
69-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
68+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
7069
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
71-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
70+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
7271
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
7372
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
7473
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -157,11 +156,9 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
157156
; CHECK: vector.body:
158157
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
159158
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
160-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
161-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
162-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
159+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
163160
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
164-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
161+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
165162
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
166163
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
167164
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
240240
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
241241
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
242242
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
243-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
244-
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
243+
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
245244
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
246245
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2)
247246
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
@@ -315,8 +314,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
315314
; CHECK: vector.body:
316315
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
317316
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
318-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
319-
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
317+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
320318
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
321319
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1)
322320
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,9 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
170170
; CHECK: vector.body:
171171
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
172172
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
173-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
174-
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
175-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1
173+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 1
176174
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]]
177-
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
175+
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0
178176
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
179177
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
180178
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -742,11 +740,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
742740
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
743741
; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
744742
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
745-
; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
746-
; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
747-
; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
743+
; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 1
748744
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
749-
; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
745+
; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0
750746
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
751747
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
752748
; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -755,11 +751,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
755751
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
756752
; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
757753
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
758-
; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
759-
; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
760-
; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
754+
; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP13]], 1
761755
; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
762-
; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
756+
; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0
763757
; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
764758
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
765759
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()

0 commit comments

Comments
 (0)