Skip to content

Commit 48dcf3b

Browse files
committed
[VPlan] Pass VF as operand to VPWidenPointerInductionRecipe
Similarly to VPWidenIntOrFpInductionRecipe, if we want to support it in EVL tail folding we need to increment the induction by EVL steps instead of VF steps, but currently the VF is hard-wired in VPWidenPointerInductionRecipe. This adds an operand for the VF and plumbs it through, so that we can swap it out in VPlanTransforms::tryAddExplicitVectorLength further down the line.
1 parent 4746395 commit 48dcf3b

File tree

6 files changed

+27
-35
lines changed

6 files changed

+27
-35
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8461,7 +8461,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
84618461
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
84628462
*PSE.getSE());
84638463
return new VPWidenPointerInductionRecipe(
8464-
Phi, Operands[0], Step, *II,
8464+
Phi, Operands[0], Step, &Plan.getVF(), *II,
84658465
LoopVectorizationPlanner::getDecisionAndClampRange(
84668466
[&](ElementCount VF) {
84678467
return CM.isScalarAfterVectorization(Phi, VF);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,7 +2201,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
22012201
};
22022202

22032203
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
2204-
public VPUnrollPartAccessor<3> {
2204+
public VPUnrollPartAccessor<4> {
22052205
const InductionDescriptor &IndDesc;
22062206

22072207
bool IsScalarAfterVectorization;
@@ -2210,21 +2210,22 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
22102210
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
22112211
/// Start.
22122212
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2213-
const InductionDescriptor &IndDesc,
2213+
VPValue *VF, const InductionDescriptor &IndDesc,
22142214
bool IsScalarAfterVectorization, DebugLoc DL)
22152215
: VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi, nullptr, DL),
22162216
IndDesc(IndDesc),
22172217
IsScalarAfterVectorization(IsScalarAfterVectorization) {
22182218
addOperand(Start);
22192219
addOperand(Step);
2220+
addOperand(VF);
22202221
}
22212222

22222223
~VPWidenPointerInductionRecipe() override = default;
22232224

22242225
VPWidenPointerInductionRecipe *clone() override {
22252226
return new VPWidenPointerInductionRecipe(
22262227
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2227-
IndDesc, IsScalarAfterVectorization, getDebugLoc());
2228+
getOperand(2), IndDesc, IsScalarAfterVectorization, getDebugLoc());
22282229
}
22292230

22302231
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -2242,7 +2243,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
22422243
/// the first unrolled part, if it exists. Returns itself if unrolling did not
22432244
/// take place.
22442245
VPValue *getFirstUnrolledPartOperand() {
2245-
return getUnrollPart(*this) == 0 ? this : getOperand(2);
2246+
return getUnrollPart(*this) == 0 ? this : getOperand(3);
22462247
}
22472248

22482249
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3182,7 +3182,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
31823182
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
31833183
Value *ScalarStepValue = State.get(getOperand(1), VPLane(0));
31843184
Type *PhiType = State.TypeAnalysis.inferScalarType(getOperand(1));
3185-
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3185+
Value *RuntimeVF = State.get(getOperand(2), true);
31863186
// Add induction update using an incorrect block temporarily. The phi node
31873187
// will be fixed after VPlan execution. Note that at this point the latch
31883188
// block cannot be used, as it does not exist yet.
@@ -3227,19 +3227,21 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
32273227
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
32283228
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
32293229
VPSlotTracker &SlotTracker) const {
3230-
assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3230+
assert((getNumOperands() == 3 || getNumOperands() == 5) &&
32313231
"unexpected number of operands");
32323232
O << Indent << "EMIT ";
32333233
printAsOperand(O, SlotTracker);
32343234
O << " = WIDEN-POINTER-INDUCTION ";
32353235
getStartValue()->printAsOperand(O, SlotTracker);
32363236
O << ", ";
32373237
getOperand(1)->printAsOperand(O, SlotTracker);
3238-
if (getNumOperands() == 4) {
3239-
O << ", ";
3240-
getOperand(2)->printAsOperand(O, SlotTracker);
3238+
O << ", ";
3239+
getOperand(2)->printAsOperand(O, SlotTracker);
3240+
if (getNumOperands() == 5) {
32413241
O << ", ";
32423242
getOperand(3)->printAsOperand(O, SlotTracker);
3243+
O << ", ";
3244+
getOperand(4)->printAsOperand(O, SlotTracker);
32433245
}
32443246
}
32453247
#endif

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
99
; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
1010
;
1111
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
12+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
1213
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1314
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
1415
; CHECK-NEXT: Live-in ir<%N> = original trip-count
@@ -22,7 +23,7 @@ target triple = "aarch64-unknown-linux-gnu"
2223
; CHECK-NEXT: <x1> vector loop: {
2324
; CHECK-NEXT: vector.body:
2425
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
25-
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1>
26+
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1>, vp<[[VF]]>
2627
; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8>
2728
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>
2829
; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]>
@@ -64,11 +65,9 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
6465
; CHECK: vector.body:
6566
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
6667
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
67-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
68-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
69-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
68+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
7069
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
71-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
70+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
7271
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
7372
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
7473
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -157,11 +156,9 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
157156
; CHECK: vector.body:
158157
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
159158
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
160-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
161-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
162-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
159+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 1
163160
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
164-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
161+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
165162
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
166163
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
167164
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
240240
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
241241
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
242242
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
243-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
244-
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
243+
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
245244
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
246245
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2)
247246
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
@@ -315,8 +314,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
315314
; CHECK: vector.body:
316315
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
317316
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
318-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
319-
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
317+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
320318
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
321319
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1)
322320
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,9 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
170170
; CHECK: vector.body:
171171
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
172172
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
173-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
174-
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
175-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1
173+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 1
176174
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]]
177-
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
175+
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0
178176
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
179177
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
180178
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -742,11 +740,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
742740
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
743741
; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
744742
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
745-
; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
746-
; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
747-
; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
743+
; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 1
748744
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
749-
; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
745+
; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0
750746
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
751747
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
752748
; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -755,11 +751,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
755751
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
756752
; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
757753
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
758-
; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
759-
; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
760-
; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
754+
; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP13]], 1
761755
; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
762-
; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
756+
; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0
763757
; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
764758
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
765759
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()

0 commit comments

Comments
 (0)