Skip to content

Commit d120c79

Browse files
committed
[VPlan] Support VPReverseVectorPointer in DataWithEVL vectorization
1 parent 562c93a commit d120c79

File tree

4 files changed

+53
-8
lines changed

4 files changed

+53
-8
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,14 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14451445
VPTypeAnalysis TypeInfo(CanonicalIVType);
14461446
LLVMContext &Ctx = CanonicalIVType->getContext();
14471447
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1448+
1449+
for (VPUser *U : Plan.getVF().users()) {
1450+
auto *CurRecipe = cast<VPRecipeBase>(U);
1451+
1452+
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(CurRecipe))
1453+
R->setOperand(1, &EVL);
1454+
}
1455+
14481456
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
14491457
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
14501458
auto *CurRecipe = cast<VPRecipeBase>(U);

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
143143
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
144144
return VerifyEVLUse(*S, 2);
145145
})
146-
.Case<VPWidenLoadEVLRecipe>([&](const VPWidenLoadEVLRecipe *L) {
147-
return VerifyEVLUse(*L, 1);
146+
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
147+
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1);
148148
})
149149
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
150150
return VerifyEVLUse(

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,26 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
4949
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
5050
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
5151
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
52+
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
53+
; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]]
54+
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]]
55+
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP10]]
56+
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP11]]
57+
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
58+
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
59+
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
60+
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64
61+
; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]]
62+
; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]]
63+
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP16]]
64+
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]]
65+
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
66+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP19]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
67+
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
68+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5269
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
53-
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54-
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
70+
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
71+
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5572
; IF-EVL: middle.block:
5673
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
5774
; IF-EVL: scalar.ph:
@@ -163,9 +180,28 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
163180
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
164181
; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
165182
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]]
183+
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP5]] to i64
184+
; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP17]]
185+
; IF-EVL-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]]
186+
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP18]]
187+
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP19]]
188+
; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
189+
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
190+
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
191+
; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
192+
; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP5]] to i64
193+
; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 0, [[TMP23]]
194+
; IF-EVL-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP23]]
195+
; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP24]]
196+
; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP25]]
197+
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
198+
; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
199+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP27]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
200+
; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
201+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]]
166202
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
167-
; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
168-
; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
203+
; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
204+
; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
169205
; IF-EVL: middle.block:
170206
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
171207
; IF-EVL: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
3838
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
3939
; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]]
4040
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
41-
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]]
42-
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]]
41+
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64
42+
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP15]]
43+
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP15]]
4344
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]]
4445
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]]
4546
; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])

0 commit comments

Comments
 (0)