Skip to content

Commit ed4f781

Browse files
committed
[Resolve TODO] Use i32 index type for unit stride and part 0
1 parent 746caae commit ed4f781

File tree

3 files changed

+18
-17
lines changed

3 files changed

+18
-17
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,12 +2323,12 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
23232323
}
23242324
#endif
23252325

2326-
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2326+
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
23272327
unsigned CurrentPart, IRBuilderBase &Builder) {
23282328
// Use i32 for the gep index type when the value is constant,
23292329
// or query DataLayout for a more suitable index type otherwise.
23302330
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2331-
return IsScalable && (IsReverse || CurrentPart > 0)
2331+
return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
23322332
? DL.getIndexType(Builder.getPtrTy(0))
23332333
: Builder.getInt32Ty();
23342334
}
@@ -2337,7 +2337,7 @@ void VPVectorEndPointerRecipe::execute(VPTransformState &State) {
23372337
auto &Builder = State.Builder;
23382338
unsigned CurrentPart = getUnrollPart(*this);
23392339
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2340-
CurrentPart, Builder);
2340+
/*IsUnitStride*/ true, CurrentPart, Builder);
23412341

23422342
// The wide store needs to start at the last vector element.
23432343
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
@@ -2372,18 +2372,19 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
23722372
auto &Builder = State.Builder;
23732373
unsigned CurrentPart = getUnrollPart(*this);
23742374
Value *Stride = State.get(getStride(), /*IsScalar*/ true);
2375-
bool IsStrideOne =
2376-
isa<ConstantInt>(Stride) && cast<ConstantInt>(Stride)->isOne();
2377-
// TODO: can use i32 index type if stride is minus one and the part is zero
2378-
// part.
2379-
Type *IndexTy = IsStrideOne
2380-
? getGEPIndexTy(State.VF.isScalable(),
2381-
/*IsReverse*/ false, CurrentPart, Builder)
2382-
: Stride->getType();
2375+
2376+
auto *StrideC = dyn_cast<ConstantInt>(Stride);
2377+
bool IsStrideOne = StrideC && StrideC->isOne();
2378+
bool IsUnitStride = IsStrideOne || (StrideC && StrideC->isMinusOne());
2379+
Type *IndexTy =
2380+
getGEPIndexTy(State.VF.isScalable(),
2381+
/*IsReverse*/ false, IsUnitStride, CurrentPart, Builder);
23832382
Value *Ptr = State.get(getOperand(0), VPLane(0));
23842383

2384+
Stride = Builder.CreateSExtOrTrunc(Stride, IndexTy);
23852385
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
23862386
Value *Index = IsStrideOne ? Increment : Builder.CreateMul(Increment, Stride);
2387+
23872388
Value *ResultPtr =
23882389
Builder.CreateGEP(IndexedTy, Ptr, Index, "", getGEPNoWrapFlags());
23892390

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
3939
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
4040
; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
4141
; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]]
42-
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0
42+
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
4343
; RV64-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP5]] to i32
4444
; RV64-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP10]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
4545
; RV64-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_STRIDED_LOAD]], splat (i32 1)
@@ -149,7 +149,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
149149
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
150150
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
151151
; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]]
152-
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0
152+
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
153153
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
154154
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
155155
; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1
@@ -235,7 +235,7 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
235235
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
236236
; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
237237
; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
238-
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 0
238+
; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
239239
; RV64-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP5]] to i32
240240
; RV64-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i64(ptr align 4 [[TMP10]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
241241
; RV64-NEXT: [[TMP12:%.*]] = fadd <vscale x 4 x float> [[WIDE_STRIDED_LOAD]], splat (float 1.000000e+00)
@@ -345,7 +345,7 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
345345
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
346346
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
347347
; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
348-
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 0
348+
; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
349349
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
350350
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
351351
; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], -1

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
3131
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]]
3232
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -1
3333
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP6]]
34-
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0
34+
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
3535
; IF-EVL-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP8]], i64 -4, <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
3636
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP6]]
3737
; IF-EVL-NEXT: [[TMP10:%.*]] = zext i32 [[TMP5]] to i64
@@ -129,7 +129,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
129129
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
130130
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100)
131131
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i64 [[TMP6]]
132-
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0
132+
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
133133
; IF-EVL-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.p0.i64(ptr align 4 [[TMP11]], i64 -4, <vscale x 4 x i1> [[TMP9]], i32 [[TMP5]])
134134
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP6]]
135135
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP5]] to i64

0 commit comments

Comments
 (0)