Skip to content

Commit 3e767a4

Browse files
committed
[RISCV] Allow non-loop invariant steps in RISCVGatherScatterLowering
The motivation for this is to allow us to match strided accesses that are emitted from the loop vectorizer with EVL tail folding (see #122232) In these loops the step isn't loop invariant and is based off of @llvm.experimental.get.vector.length. We can relax this as long as we make sure to construct the updates after the definition inside the loop, instead of the preheader. I presume the restriction was previously added so that the step would dominate the insertion point in the preheader. I can't think of why it wouldn't be safe to calculate it in the loop otherwise.
1 parent cd92763 commit 3e767a4

File tree

3 files changed

+49
-54
lines changed

3 files changed

+49
-54
lines changed

llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,6 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
211211
assert(Phi->getIncomingValue(IncrementingBlock) == Inc &&
212212
"Expected one operand of phi to be Inc");
213213

214-
// Only proceed if the step is loop invariant.
215-
if (!L->isLoopInvariant(Step))
216-
return false;
217-
218214
// Step should be a splat.
219215
Step = getSplatValue(Step);
220216
if (!Step)
@@ -310,18 +306,31 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
310306
}
311307
case Instruction::Mul: {
312308
Start = Builder.CreateMul(Start, SplatOp, "start");
313-
Step = Builder.CreateMul(Step, SplatOp, "step");
314309
Stride = Builder.CreateMul(Stride, SplatOp, "stride");
315310
break;
316311
}
317312
case Instruction::Shl: {
318313
Start = Builder.CreateShl(Start, SplatOp, "start");
319-
Step = Builder.CreateShl(Step, SplatOp, "step");
320314
Stride = Builder.CreateShl(Stride, SplatOp, "stride");
321315
break;
322316
}
323317
}
324318

319+
// Adjust the step value after its definition if it's an instruction.
320+
if (auto *StepI = dyn_cast<Instruction>(Step))
321+
Builder.SetInsertPoint(*StepI->getInsertionPointAfterDef());
322+
323+
switch (BO->getOpcode()) {
324+
default:
325+
break;
326+
case Instruction::Mul:
327+
Step = Builder.CreateMul(Step, SplatOp, "step");
328+
break;
329+
case Instruction::Shl:
330+
Step = Builder.CreateShl(Step, SplatOp, "step");
331+
break;
332+
}
333+
325334
Inc->setOperand(StepIndex, Step);
326335
BasePtr->setIncomingValue(StartBlock, Start);
327336
return true;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,8 @@ for.cond.cleanup: ; preds = %vector.body
320320
define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %shift) {
321321
; CHECK-LABEL: @gather_unknown_pow2(
322322
; CHECK-NEXT: entry:
323-
; CHECK-NEXT: [[STEP:%.*]] = shl i64 8, [[SHIFT:%.*]]
324-
; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SHIFT]]
323+
; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SHIFT:%.*]]
324+
; CHECK-NEXT: [[STEP:%.*]] = shl i64 8, [[SHIFT]]
325325
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STRIDE]], 4
326326
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
327327
; CHECK: vector.body:

llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll

Lines changed: 32 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -109,21 +109,20 @@ define <vscale x 1 x i64> @gather_non_invariant_step(ptr %a, ptr %b, i32 %len) {
109109
; CHECK-LABEL: @gather_non_invariant_step(
110110
; CHECK-NEXT: vector.ph:
111111
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
112-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
113112
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
114113
; CHECK: vector.body:
115114
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
116-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
115+
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
117116
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
118-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
119-
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP1]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
117+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
118+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
119+
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
120+
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
120121
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
121122
; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
122123
; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
123124
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
124-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[STEP]], i64 0
125-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
126-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
125+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
127126
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
128127
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
129128
; CHECK: for.cond.cleanup:
@@ -159,23 +158,21 @@ define <vscale x 1 x i64> @gather_non_invariant_step_shl(ptr %a, ptr %b, i32 %le
159158
; CHECK-LABEL: @gather_non_invariant_step_shl(
160159
; CHECK-NEXT: vector.ph:
161160
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
162-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
163161
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
164162
; CHECK: vector.body:
165163
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
166-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
164+
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
167165
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
168-
; CHECK-NEXT: [[VEC_IND_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], splat (i64 42)
169-
; CHECK-NEXT: [[VEC_IND_SHL:%.*]] = shl <vscale x 1 x i64> [[VEC_IND_ADD]], splat (i64 2)
170-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND_SHL]], i32 3
171-
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP1]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef)
166+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
167+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
168+
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
169+
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]])
172170
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
173171
; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
174172
; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
173+
; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2
175174
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
176-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[STEP]], i64 0
177-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
178-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
175+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
179176
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
180177
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
181178
; CHECK: for.cond.cleanup:
@@ -256,19 +253,17 @@ define void @scatter_non_invariant_step(ptr %a, ptr %b, i32 %len) {
256253
; CHECK-LABEL: @scatter_non_invariant_step(
257254
; CHECK-NEXT: vector.ph:
258255
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
259-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
260256
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
261257
; CHECK: vector.body:
262258
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
263-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
264-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
265-
; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], i32 8, <vscale x 1 x i1> splat (i1 true))
259+
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
260+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
261+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
262+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
266263
; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
267264
; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
268265
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
269-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[STEP]], i64 0
270-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
271-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
266+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]]
272267
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
273268
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
274269
; CHECK: for.cond.cleanup:
@@ -302,21 +297,18 @@ define void @scatter_non_invariant_step_add_shl(ptr %a, ptr %b, i32 %len) {
302297
; CHECK-LABEL: @scatter_non_invariant_step_add_shl(
303298
; CHECK-NEXT: vector.ph:
304299
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
305-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
306300
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
307301
; CHECK: vector.body:
308302
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
309-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
310-
; CHECK-NEXT: [[VEC_IND_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], splat (i64 42)
311-
; CHECK-NEXT: [[VEC_IND_SHL:%.*]] = shl <vscale x 1 x i64> [[VEC_IND_ADD]], splat (i64 2)
312-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND_SHL]], i32 3
313-
; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], i32 8, <vscale x 1 x i1> splat (i1 true))
303+
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
304+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
305+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
306+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]])
314307
; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]]
315308
; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8
309+
; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2
316310
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]]
317-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[STEP]], i64 0
318-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
319-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
311+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]]
320312
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
321313
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
322314
; CHECK: for.cond.cleanup:
@@ -695,22 +687,19 @@ define <vscale x 1 x i64> @evl_gather(ptr %a, i32 %len) {
695687
; CHECK-LABEL: @evl_gather(
696688
; CHECK-NEXT: vector.ph:
697689
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
698-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
699690
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
700691
; CHECK: vector.body:
701692
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
702-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
693+
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
703694
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
704695
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
705696
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
706-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
707-
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
697+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
698+
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
708699
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
709700
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
710701
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
711-
; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
712-
; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
713-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
702+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
714703
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
715704
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
716705
; CHECK: for.cond.cleanup:
@@ -752,20 +741,17 @@ define void @evl_scatter(ptr %a, i32 %len) {
752741
; CHECK-LABEL: @evl_scatter(
753742
; CHECK-NEXT: vector.ph:
754743
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
755-
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
756744
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
757745
; CHECK: vector.body:
758746
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
759-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
747+
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
760748
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR1]]
761749
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
762-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
763-
; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
750+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
751+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
764752
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
765753
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add nuw i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]]
766-
; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
767-
; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
768-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
754+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
769755
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR1]], [[WIDE_TRIP_COUNT]]
770756
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
771757
; CHECK: for.cond.cleanup:

0 commit comments

Comments
 (0)