Skip to content

Commit 3f0acb2

Browse files
committed
[RISCV] Match strided vector bases in RISCVGatherScatterLowering
Currently we only match GEPs with a scalar base pointer, but a common pattern that's emitted from the loop vectorizer is a strided vector base plus some sort of scalar offset: %base = getelementptr i64, ptr %p, <vscale x 1 x i64> %step %gep = getelementptr i64, <vscale x 1 x ptr> %base, i64 %offset This is common for accesses into a struct e.g. f[i].b below: struct F { int a; char b; }; void foo(struct F *f) { for (int i = 0; i < 1024; i += 2) { f[i].a++; f[i].b++; } } This patch handles this case in RISCVGatherScatterLowering by recursing on the base pointer if it's a vector. With this we can convert roughly 80% of the indexed loads and stores emitted to strided loads and stores on SPEC CPU 2017, -O3 -march=rva22u64_v
1 parent 458a315 commit 3f0acb2

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,22 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
349349

350350
SmallVector<Value *, 2> Ops(GEP->operands());
351351

352+
// If the base pointer is a vector, check if it's strided.
353+
if (GEP->getPointerOperand()->getType()->isVectorTy()) {
354+
auto [BaseBase, Stride] = determineBaseAndStride(
355+
cast<Instruction>(GEP->getPointerOperand()), Builder);
356+
// If GEP's offset is scalar then we can add it to the base pointer's base.
357+
auto IsScalar = [](Value *Idx) { return !Idx->getType()->isVectorTy(); };
358+
if (BaseBase && all_of(GEP->indices(), IsScalar)) {
359+
Builder.SetInsertPoint(GEP);
360+
SmallVector<Value *> Indices(GEP->indices());
361+
Value *OffsetBase =
362+
Builder.CreateGEP(GEP->getSourceElementType(), BaseBase, Indices, "",
363+
GEP->isInBounds());
364+
return {OffsetBase, Stride};
365+
}
366+
}
367+
352368
// Base pointer needs to be a scalar.
353369
Value *ScalarBase = Ops[0];
354370
if (ScalarBase->getType()->isVectorTy()) {

llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,8 @@ define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
301301

302302
define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
303303
; CHECK-LABEL: @vector_base_scalar_offset(
304-
; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
305-
; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]]
306-
; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]]
307-
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
304+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
305+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
308306
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
309307
;
310308
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -321,10 +319,8 @@ define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
321319

322320
define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) {
323321
; CHECK-LABEL: @splat_base_scalar_offset(
324-
; CHECK-NEXT: [[HEAD:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[P:%.*]], i32 0
325-
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[HEAD]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
326-
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[SPLAT]], i64 [[OFFSET:%.*]]
327-
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
322+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
323+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 0, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
328324
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
329325
;
330326
%head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0

0 commit comments

Comments
 (0)