Skip to content

Commit 498aa53

Browse files
committed
[IVDescriptors] Add pointer InductionDescriptors with non-constant strides
This matches the handling for integer IVs. I left the non-opaque cases alone, mostly because they're largely irrelevant today. This doesn't actually make much difference in vectorization right now as we immediately fail on aliasing checks (which also bail on non-constant strides). Slightly suprisingly, it's the case which *do* need runtime checks which work after this patch as they don't use the same dependency analysis path. This will also enable non-constant stride pointer recurrences for other consumers. I've auditted said code, and don't see any obvious issues.
1 parent e8bc77e commit 498aa53

File tree

3 files changed

+90
-17
lines changed

3 files changed

+90
-17
lines changed

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,8 +1288,6 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
12881288
assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
12891289
"Step value is zero");
12901290

1291-
assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
1292-
"Step value should be constant for pointer induction");
12931291
assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
12941292
"StepValue is not an integer");
12951293

@@ -1570,15 +1568,25 @@ bool InductionDescriptor::isInductionPHI(
15701568
}
15711569

15721570
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1571+
PointerType *PtrTy = cast<PointerType>(PhiTy);
1572+
1573+
// Always use i8 element type for opaque pointer inductions.
1574+
// This allows induction variables w/non-constant steps.
1575+
if (PtrTy->isOpaque()) {
1576+
D = InductionDescriptor(StartValue, IK_PtrInduction, Step,
1577+
/* BinOp */ nullptr,
1578+
Type::getInt8Ty(PtrTy->getContext()));
1579+
return true;
1580+
}
1581+
15731582
// Pointer induction should be a constant.
1583+
// TODO: This could be generalized, but should probably just
1584+
// be dropped instead once the migration to opaque ptrs is
1585+
// complete.
15741586
if (!ConstStep)
15751587
return false;
15761588

1577-
// Always use i8 element type for opaque pointer inductions.
1578-
PointerType *PtrTy = cast<PointerType>(PhiTy);
1579-
Type *ElementType = PtrTy->isOpaque()
1580-
? Type::getInt8Ty(PtrTy->getContext())
1581-
: PtrTy->getNonOpaquePointerElementType();
1589+
Type *ElementType = PtrTy->getNonOpaquePointerElementType();
15821590
if (!ElementType->isSized())
15831591
return false;
15841592

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,8 +2492,6 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
24922492
return CreateAdd(StartValue, Offset);
24932493
}
24942494
case InductionDescriptor::IK_PtrInduction: {
2495-
assert(isa<Constant>(Step) &&
2496-
"Expected constant step for pointer induction");
24972495
return B.CreateGEP(ID.getElementType(), StartValue, CreateMul(Index, Step));
24982496
}
24992497
case InductionDescriptor::IK_FpInduction: {
@@ -8271,7 +8269,6 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
82718269
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
82728270
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
82738271
*PSE.getSE());
8274-
assert(isa<SCEVConstant>(II->getStep()));
82758272
return new VPWidenPointerInductionRecipe(
82768273
Phi, Operands[0], Step, *II,
82778274
LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -9377,8 +9374,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
93779374
return;
93789375
}
93799376

9380-
assert(isa<SCEVConstant>(IndDesc.getStep()) &&
9381-
"Induction step not a SCEV constant!");
93829377
Type *PhiType = IndDesc.getStep()->getType();
93839378

93849379
// Build a pointer phi

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -539,19 +539,89 @@ exit:
539539
define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
540540
; CHECK-LABEL: @double_stride_ptr_iv(
541541
; CHECK-NEXT: entry:
542+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
543+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
544+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
545+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
546+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
547+
; CHECK: vector.scevcheck:
548+
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
549+
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
550+
; CHECK: vector.memcheck:
551+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 1027
552+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1027
553+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP1]]
554+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
555+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
556+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
557+
; CHECK: vector.ph:
558+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
559+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
560+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
561+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
562+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
563+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]]
564+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
565+
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP6]]
566+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
567+
; CHECK: vector.body:
568+
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
569+
; CHECK-NEXT: [[POINTER_PHI7:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND8:%.*]], [[VECTOR_BODY]] ]
570+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
571+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
572+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
573+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
574+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[STRIDE]], [[TMP9]]
575+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
576+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
577+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
578+
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
579+
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP12]]
580+
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
581+
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
582+
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP13]], [[DOTSPLAT6]]
583+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]]
584+
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
585+
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
586+
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 1
587+
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[STRIDE]], [[TMP17]]
588+
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 0
589+
; CHECK-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP19]], i64 0
590+
; CHECK-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
591+
; CHECK-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
592+
; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT10]], [[TMP20]]
593+
; CHECK-NEXT: [[VECTOR_GEP13:%.*]] = mul <vscale x 4 x i64> [[TMP21]], [[DOTSPLAT6]]
594+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[POINTER_PHI7]], <vscale x 4 x i64> [[VECTOR_GEP13]]
595+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope !16
596+
; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
597+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP23]], <vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope !19, !noalias !16
598+
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
599+
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 4
600+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP25]]
601+
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
602+
; CHECK-NEXT: [[PTR_IND8]] = getelementptr i8, ptr [[POINTER_PHI7]], i64 [[TMP18]]
603+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
604+
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
605+
; CHECK: middle.block:
606+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
607+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
608+
; CHECK: scalar.ph:
609+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
610+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ], [ [[P]], [[VECTOR_MEMCHECK]] ]
611+
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[ENTRY]] ], [ [[P2]], [[VECTOR_SCEVCHECK]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ]
542612
; CHECK-NEXT: br label [[LOOP:%.*]]
543613
; CHECK: loop:
544-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
545-
; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
546-
; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[P2:%.*]], [[ENTRY]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
614+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
615+
; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
616+
; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
547617
; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
548618
; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
549619
; CHECK-NEXT: store i32 [[Y0]], ptr [[PTR2]], align 4
550-
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
620+
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE]]
551621
; CHECK-NEXT: [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
552622
; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1
553623
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
554-
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
624+
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
555625
; CHECK: exit:
556626
; CHECK-NEXT: ret void
557627
;

0 commit comments

Comments
 (0)