Skip to content

Commit 18ec330

Browse files
committed
[VPlan] Manage InBounds via VPRecipeWithIRFlags for VectorPtrRecipe.
As suggested as follow-up in #72164, manage inbounds via VPRecipeWithIRFlags. Note that in some cases we can now preserve inbounds in a few more cases.
1 parent 249d2cc commit 18ec330

File tree

5 files changed

+42
-26
lines changed

5 files changed

+42
-26
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8077,8 +8077,11 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
80778077

80788078
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
80798079
if (Consecutive) {
8080-
auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
8081-
Reverse, I->getDebugLoc());
8080+
auto *GEP = dyn_cast<GetElementPtrInst>(
8081+
Ptr->getUnderlyingValue()->stripPointerCasts());
8082+
auto *VectorPtr = new VPVectorPointerRecipe(
8083+
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
8084+
I->getDebugLoc());
80828085
Builder.getInsertBlock()->appendRecipe(VectorPtr);
80838086
Ptr = VectorPtr;
80848087
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -842,16 +842,19 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
842842
WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
843843
};
844844

845+
protected:
846+
struct GEPFlagsTy {
847+
char IsInBounds : 1;
848+
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
849+
};
850+
845851
private:
846852
struct DisjointFlagsTy {
847853
char IsDisjoint : 1;
848854
};
849855
struct ExactFlagsTy {
850856
char IsExact : 1;
851857
};
852-
struct GEPFlagsTy {
853-
char IsInBounds : 1;
854-
};
855858
struct NonNegFlagsTy {
856859
char NonNeg : 1;
857860
};
@@ -933,12 +936,21 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
933936
: VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp),
934937
FMFs(FMFs) {}
935938

939+
protected:
940+
template <typename IterT>
941+
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
942+
GEPFlagsTy GEPFlags, DebugLoc DL = {})
943+
: VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp),
944+
GEPFlags(GEPFlags) {}
945+
946+
public:
936947
static inline bool classof(const VPRecipeBase *R) {
937948
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
938949
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
939950
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
940951
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
941-
R->getVPDefID() == VPRecipeBase::VPReplicateSC;
952+
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
953+
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942954
}
943955

944956
/// Drop all poison-generating flags.
@@ -1361,15 +1373,16 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
13611373
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
13621374
/// all parts. If IsReverse is true, compute pointers for accessing the input in
13631375
/// reverse order per part.
1364-
class VPVectorPointerRecipe : public VPRecipeBase, public VPValue {
1376+
class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue {
13651377
Type *IndexedTy;
13661378
bool IsReverse;
13671379

13681380
public:
13691381
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1370-
DebugLoc DL)
1371-
: VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this),
1372-
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1382+
bool IsInBounds, DebugLoc DL)
1383+
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1384+
GEPFlagsTy(IsInBounds), DL),
1385+
VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {}
13731386

13741387
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
13751388

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,9 +1308,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
13081308
? DL.getIndexType(IndexedTy->getPointerTo())
13091309
: Builder.getInt32Ty();
13101310
Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1311-
bool InBounds = false;
1312-
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
1313-
InBounds = GEP->isInBounds();
1311+
bool InBounds = isInBounds();
13141312
if (IsReverse) {
13151313
// If the address is consecutive but reversed, then the
13161314
// wide store needs to start at the last vector element.

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,10 +584,11 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
584584
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
585585
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
586586
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float 3.300000e+01, float 3.300000e+01, float 3.300000e+01, float 3.300000e+01>, <4 x float> [[TMP6]]
587-
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
587+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0
588+
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
588589
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
589-
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
590-
; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body
590+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
591+
; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body
591592

592593
entry:
593594
br label %loop.body

llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,30 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
150150
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
151151
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
152152
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
153-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
154-
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4
153+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]]
154+
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]]
155155
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
156156
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
157157
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
158158
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
159-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
160-
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7
159+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]]
160+
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META7]]
161161
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
162162
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT]]
163163
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
164164
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4
165-
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
166-
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
165+
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
166+
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]]
167167
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD10]]
168168
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
169169
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD11]]
170170
; CHECK-NEXT: [[PREDPHI12:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]]
171-
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
172-
; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
171+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4
172+
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
173+
; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]
173174
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
174-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
175-
; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
175+
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
176+
; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
176177
; CHECK: loop.body:
177178
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
178179
; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]

0 commit comments

Comments
 (0)