Skip to content

Commit d8e8185

Browse files
committed
[Resolve TODO] Use i32 index type for unit stride and part 0
1 parent f40ebc6 commit d8e8185

File tree

3 files changed

+18
-17
lines changed

3 files changed

+18
-17
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2331,12 +2331,12 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
23312331
}
23322332
#endif
23332333

2334-
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2334+
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
23352335
unsigned CurrentPart, IRBuilderBase &Builder) {
23362336
// Use i32 for the gep index type when the value is constant,
23372337
// or query DataLayout for a more suitable index type otherwise.
23382338
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2339-
return IsScalable && (IsReverse || CurrentPart > 0)
2339+
return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
23402340
? DL.getIndexType(Builder.getPtrTy(0))
23412341
: Builder.getInt32Ty();
23422342
}
@@ -2345,7 +2345,7 @@ void VPVectorEndPointerRecipe::execute(VPTransformState &State) {
23452345
auto &Builder = State.Builder;
23462346
unsigned CurrentPart = getUnrollPart(*this);
23472347
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2348-
CurrentPart, Builder);
2348+
/*IsUnitStride*/ true, CurrentPart, Builder);
23492349

23502350
// The wide store needs to start at the last vector element.
23512351
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
@@ -2380,18 +2380,19 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
23802380
auto &Builder = State.Builder;
23812381
unsigned CurrentPart = getUnrollPart(*this);
23822382
Value *Stride = State.get(getStride(), /*IsScalar*/ true);
2383-
bool IsStrideOne =
2384-
isa<ConstantInt>(Stride) && cast<ConstantInt>(Stride)->isOne();
2385-
// TODO: can use i32 index type if stride is minus one and the part is zero
2386-
// part.
2387-
Type *IndexTy = IsStrideOne
2388-
? getGEPIndexTy(State.VF.isScalable(),
2389-
/*IsReverse*/ false, CurrentPart, Builder)
2390-
: Stride->getType();
2383+
2384+
auto *StrideC = dyn_cast<ConstantInt>(Stride);
2385+
bool IsStrideOne = StrideC && StrideC->isOne();
2386+
bool IsUnitStride = IsStrideOne || (StrideC && StrideC->isMinusOne());
2387+
Type *IndexTy =
2388+
getGEPIndexTy(State.VF.isScalable(),
2389+
/*IsReverse*/ false, IsUnitStride, CurrentPart, Builder);
23912390
Value *Ptr = State.get(getOperand(0), VPLane(0));
23922391

2392+
Stride = Builder.CreateSExtOrTrunc(Stride, IndexTy);
23932393
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
23942394
Value *Index = IsStrideOne ? Increment : Builder.CreateMul(Increment, Stride);
2395+
23952396
Value *ResultPtr =
23962397
Builder.CreateGEP(IndexedTy, Ptr, Index, "", getGEPNoWrapFlags());
23972398

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
3939
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
4040
; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
4141
; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]]
42-
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0
42+
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
4343
; RV64-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP5]] to i32
4444
; RV64-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP10]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
4545
; RV64-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_STRIDED_LOAD]], splat (i32 1)
@@ -149,7 +149,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
149149
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
150150
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
151151
; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]]
152-
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0
152+
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
153153
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
154154
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
155155
; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1
@@ -235,7 +235,7 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
235235
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
236236
; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
237237
; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
238-
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 0
238+
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
239239
; RV64-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP5]] to i32
240240
; RV64-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i64(ptr align 4 [[TMP10]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
241241
; RV64-NEXT: [[TMP12:%.*]] = fadd <vscale x 4 x float> [[WIDE_STRIDED_LOAD]], splat (float 1.000000e+00)
@@ -345,7 +345,7 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
345345
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
346346
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
347347
; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
348-
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 0
348+
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
349349
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
350350
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
351351
; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
3131
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]]
3232
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -1
3333
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP6]]
34-
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0
34+
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
3535
; IF-EVL-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP8]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
3636
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP6]]
3737
; IF-EVL-NEXT: [[TMP10:%.*]] = zext i32 [[TMP5]] to i64
@@ -129,7 +129,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
129129
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
130130
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100)
131131
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i64 [[TMP6]]
132-
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0
132+
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
133133
; IF-EVL-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP11]], i64 -4, <vscale x 4 x i1> [[TMP9]], i32 [[TMP5]])
134134
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP6]]
135135
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP5]] to i64

0 commit comments

Comments
 (0)