Skip to content

Commit ef391db

Browse files
authored
[LV] Drop incorrect inbounds for reverse vector pointer when folding tail (#120730)
When folding the tail, we may compute an address that we don't in the original scalar loop and it may not be inbounds. Drop Inbounds in that case.
1 parent 7edeeab commit ef391db

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8354,17 +8354,22 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83548354
auto *GEP = dyn_cast<GetElementPtrInst>(
83558355
Ptr->getUnderlyingValue()->stripPointerCasts());
83568356
VPSingleDefRecipe *VectorPtr;
8357-
if (Reverse)
8357+
if (Reverse) {
8358+
// When folding the tail, we may compute an address that we don't in the
8359+
// original scalar loop and it may not be inbounds. Drop Inbounds in that
8360+
// case.
8361+
GEPNoWrapFlags Flags =
8362+
(CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
8363+
? GEPNoWrapFlags::none()
8364+
: GEPNoWrapFlags::inBounds();
83588365
VectorPtr = new VPReverseVectorPointerRecipe(
8359-
Ptr, &Plan.getVF(), getLoadStoreType(I),
8360-
GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
8361-
: GEPNoWrapFlags::none(),
8362-
I->getDebugLoc());
8363-
else
8366+
Ptr, &Plan.getVF(), getLoadStoreType(I), Flags, I->getDebugLoc());
8367+
} else {
83648368
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
83658369
GEP ? GEP->getNoWrapFlags()
83668370
: GEPNoWrapFlags::none(),
83678371
I->getDebugLoc());
8372+
}
83688373
Builder.getInsertBlock()->appendRecipe(VectorPtr);
83698374
Ptr = VectorPtr;
83708375
}

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,16 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
3737
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
3838
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]]
3939
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]]
40-
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
41-
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
40+
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]]
41+
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]]
4242
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
4343
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
4444
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
4545
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64
4646
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]]
4747
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]]
48-
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
49-
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
48+
; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]]
49+
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]]
5050
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
5151
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
5252
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64

llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
55
target triple = "x86_64-unknown-linux-gnu"
66

7-
; FIXME: GEP flags on GEPs for reverse vector pointer need to be dropped when folding the tail.
8-
97
define i1 @fn(ptr %nno) #0 {
108
; CHECK-LABEL: define i1 @fn(
119
; CHECK-SAME: ptr [[NNO:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -26,8 +24,8 @@ define i1 @fn(ptr %nno) #0 {
2624
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
2725
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
2826
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[TMP22]]
29-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
30-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
27+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0
28+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3
3129
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3230
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[REVERSE]], <4 x i32> poison)
3331
; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

0 commit comments

Comments
 (0)