-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Propagate all GEP flags #119899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Propagate all GEP flags #119899
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-powerpc Author: Nikita Popov (nikic) ChangesStore GEPNoWrapFlags instead of only InBounds and propagate them. Patch is 67.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119899.diff 23 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index fbcf181a45a664..650a4859780da2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -222,13 +222,13 @@ class VPBuilder {
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
- return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
+ return tryInsertInstruction(
+ new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
- return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
+ return tryInsertInstruction(
+ new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 122dc1db0b59dc..d839b734086a2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8407,10 +8407,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
- GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
+ : GEPNoWrapFlags::none(),
+ I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
- GEP ? GEP->isInBounds() : false,
+ GEP ? GEP->getNoWrapFlags()
+ : GEPNoWrapFlags::none(),
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ae68e1fc63a139..6722239d8d75a4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -952,11 +952,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};
- struct GEPFlagsTy {
- char IsInBounds : 1;
- GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
- };
-
private:
struct ExactFlagsTy {
char IsExact : 1;
@@ -983,7 +978,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
WrapFlagsTy WrapFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
- GEPFlagsTy GEPFlags;
+ GEPNoWrapFlags GEPFlags;
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
@@ -1020,7 +1015,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
- GEPFlags.IsInBounds = GEP->isInBounds();
+ GEPFlags = GEP->getNoWrapFlags();
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
OpType = OperationType::NonNegOp;
NonNegFlags.NonNeg = PNNI->hasNonNeg();
@@ -1060,7 +1055,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- GEPFlagsTy GEPFlags, DebugLoc DL = {})
+ GEPNoWrapFlags GEPFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}
@@ -1097,7 +1092,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = false;
break;
case OperationType::GEPOp:
- GEPFlags.IsInBounds = false;
+ GEPFlags = GEPNoWrapFlags::none();
break;
case OperationType::FPMathOp:
FMFs.NoNaNs = false;
@@ -1126,10 +1121,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
I->setIsExact(ExactFlags.IsExact);
break;
case OperationType::GEPOp:
- // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
- cast<GetElementPtrInst>(I)->setNoWrapFlags(
- GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
- : GEPNoWrapFlags::none());
+ cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
break;
case OperationType::FPMathOp:
I->setHasAllowReassoc(FMFs.AllowReassoc);
@@ -1155,11 +1147,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return CmpPredicate;
}
- bool isInBounds() const {
- assert(OpType == OperationType::GEPOp &&
- "recipe doesn't have inbounds flag");
- return GEPFlags.IsInBounds;
- }
+ GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
@@ -1306,7 +1294,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}
- VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
+ VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags,
DebugLoc DL = {}, const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC,
ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
@@ -1922,10 +1910,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
- bool IsInBounds, DebugLoc DL)
+ GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
- ArrayRef<VPValue *>({Ptr, VF}),
- GEPFlagsTy(IsInBounds), DL),
+ ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
@@ -1957,8 +1944,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPReverseVectorPointerRecipe *clone() override {
- return new VPReverseVectorPointerRecipe(
- getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ return new VPReverseVectorPointerRecipe(getOperand(0), getVFValue(),
+ IndexedTy, getGEPNoWrapFlags(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1974,10 +1962,10 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
Type *IndexedTy;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
- GEPFlagsTy(IsInBounds), DL),
+ GEPFlags, DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1999,8 +1987,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
- getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
+ getGEPNoWrapFlags(), getDebugLoc());
}
/// Return the cost of this VPHeaderPHIRecipe.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02774d8e5c5fef..1ad2edfadfb013 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -621,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Addend = State.get(getOperand(1), VPLane(0));
- return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
- : Builder.CreatePtrAdd(Ptr, Addend, Name);
+ return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
@@ -1276,8 +1275,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
getFastMathFlags().print(O);
break;
case OperationType::GEPOp:
- if (GEPFlags.IsInBounds)
+ if (GEPFlags.isInBounds())
O << " inbounds";
+ else if (GEPFlags.hasNoUnsignedSignedWrap())
+ O << " nusw";
+ if (GEPFlags.hasNoUnsignedWrap())
+ O << " nuw";
break;
case OperationType::NonNegOp:
if (NonNegFlags.NonNeg)
@@ -1918,9 +1921,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
Ops.push_back(State.get(getOperand(I), VPLane(0)));
- auto *NewGEP =
- State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
- ArrayRef(Ops).drop_front(), "", isInBounds());
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
+ ArrayRef(Ops).drop_front(), "",
+ getGEPNoWrapFlags());
Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, Splat);
State.addMetadata(Splat, GEP);
@@ -1946,7 +1949,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", isInBounds());
+ Indices, "", getGEPNoWrapFlags());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP);
@@ -1997,9 +2000,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
+ getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -2009,9 +2013,9 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
- O << " = reverse-vector-pointer ";
- if (isInBounds())
- O << "inbounds ";
+ O << " = reverse-vector-pointer";
+ printFlags(O);
+ O << " ";
printOperands(O, SlotTracker);
}
#endif
@@ -2023,10 +2027,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index a02323ab2108f9..7a7eb71ea2f23c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -81,8 +81,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS1-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
-; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
-; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
+; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
+; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1
; CHECK-VS1-NEXT: [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS1-NEXT: store <vscale x 16 x i8> [[TMP24]], ptr [[TMP23]], align 1
@@ -115,8 +115,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS1-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
-; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
+; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
+; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP34]], align 1
; CHECK-VS1-NEXT: [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS1-NEXT: store <vscale x 8 x i8> [[TMP35]], ptr [[TMP34]], align 1
@@ -189,8 +189,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS2-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
-; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
-; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
+; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
+; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP23]], align 1
; CHECK-VS2-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS2-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP23]], align 1
@@ -223,8 +223,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS2-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
-; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
+; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
+; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP34]], align 1
; CHECK-VS2-NEXT: [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS2-NEXT: store <vscale x 4 x i8> [[TMP35]], ptr [[TMP34]], align 1
@@ -279,7 +279,7 @@ while.end:
define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) {
; CHECK-LABEL: define void @trip_count_too_small(
-; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TC]], 3
; CHECK-NEXT: br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
@@ -440,8 +440,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
index fb5ff66989a679..7d71ff87ec8be2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
@@ -14,36 +14,36 @@ target triple = "aarch64-unknown-linux-gnu"
define void @load_ext_trunc_store(ptr readonly %in, ptr noalias %out, i64 %N) {
; CHECK-LABEL: define void @load_ext_trunc_store(
; CHECK-SAME: ptr readonly [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[IN]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw double, ptr [[IN]], i64 [[TMP0]]
...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesStore GEPNoWrapFlags instead of only InBounds and propagate them. Patch is 67.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119899.diff 23 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index fbcf181a45a664..650a4859780da2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -222,13 +222,13 @@ class VPBuilder {
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
- return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
+ return tryInsertInstruction(
+ new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
- return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
+ return tryInsertInstruction(
+ new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 122dc1db0b59dc..d839b734086a2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8407,10 +8407,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
- GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
+ : GEPNoWrapFlags::none(),
+ I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
- GEP ? GEP->isInBounds() : false,
+ GEP ? GEP->getNoWrapFlags()
+ : GEPNoWrapFlags::none(),
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ae68e1fc63a139..6722239d8d75a4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -952,11 +952,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};
- struct GEPFlagsTy {
- char IsInBounds : 1;
- GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
- };
-
private:
struct ExactFlagsTy {
char IsExact : 1;
@@ -983,7 +978,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
WrapFlagsTy WrapFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
- GEPFlagsTy GEPFlags;
+ GEPNoWrapFlags GEPFlags;
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
@@ -1020,7 +1015,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
- GEPFlags.IsInBounds = GEP->isInBounds();
+ GEPFlags = GEP->getNoWrapFlags();
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
OpType = OperationType::NonNegOp;
NonNegFlags.NonNeg = PNNI->hasNonNeg();
@@ -1060,7 +1055,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- GEPFlagsTy GEPFlags, DebugLoc DL = {})
+ GEPNoWrapFlags GEPFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}
@@ -1097,7 +1092,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = false;
break;
case OperationType::GEPOp:
- GEPFlags.IsInBounds = false;
+ GEPFlags = GEPNoWrapFlags::none();
break;
case OperationType::FPMathOp:
FMFs.NoNaNs = false;
@@ -1126,10 +1121,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
I->setIsExact(ExactFlags.IsExact);
break;
case OperationType::GEPOp:
- // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
- cast<GetElementPtrInst>(I)->setNoWrapFlags(
- GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
- : GEPNoWrapFlags::none());
+ cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
break;
case OperationType::FPMathOp:
I->setHasAllowReassoc(FMFs.AllowReassoc);
@@ -1155,11 +1147,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return CmpPredicate;
}
- bool isInBounds() const {
- assert(OpType == OperationType::GEPOp &&
- "recipe doesn't have inbounds flag");
- return GEPFlags.IsInBounds;
- }
+ GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
@@ -1306,7 +1294,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}
- VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
+ VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags,
DebugLoc DL = {}, const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC,
ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
@@ -1922,10 +1910,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
- bool IsInBounds, DebugLoc DL)
+ GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
- ArrayRef<VPValue *>({Ptr, VF}),
- GEPFlagsTy(IsInBounds), DL),
+ ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
@@ -1957,8 +1944,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPReverseVectorPointerRecipe *clone() override {
- return new VPReverseVectorPointerRecipe(
- getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ return new VPReverseVectorPointerRecipe(getOperand(0), getVFValue(),
+ IndexedTy, getGEPNoWrapFlags(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1974,10 +1962,10 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
Type *IndexedTy;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
- GEPFlagsTy(IsInBounds), DL),
+ GEPFlags, DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1999,8 +1987,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
- getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
+ getGEPNoWrapFlags(), getDebugLoc());
}
/// Return the cost of this VPHeaderPHIRecipe.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02774d8e5c5fef..1ad2edfadfb013 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -621,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Addend = State.get(getOperand(1), VPLane(0));
- return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
- : Builder.CreatePtrAdd(Ptr, Addend, Name);
+ return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
@@ -1276,8 +1275,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
getFastMathFlags().print(O);
break;
case OperationType::GEPOp:
- if (GEPFlags.IsInBounds)
+ if (GEPFlags.isInBounds())
O << " inbounds";
+ else if (GEPFlags.hasNoUnsignedSignedWrap())
+ O << " nusw";
+ if (GEPFlags.hasNoUnsignedWrap())
+ O << " nuw";
break;
case OperationType::NonNegOp:
if (NonNegFlags.NonNeg)
@@ -1918,9 +1921,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
Ops.push_back(State.get(getOperand(I), VPLane(0)));
- auto *NewGEP =
- State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
- ArrayRef(Ops).drop_front(), "", isInBounds());
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
+ ArrayRef(Ops).drop_front(), "",
+ getGEPNoWrapFlags());
Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, Splat);
State.addMetadata(Splat, GEP);
@@ -1946,7 +1949,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", isInBounds());
+ Indices, "", getGEPNoWrapFlags());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP);
@@ -1997,9 +2000,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
+ getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -2009,9 +2013,9 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
- O << " = reverse-vector-pointer ";
- if (isInBounds())
- O << "inbounds ";
+ O << " = reverse-vector-pointer";
+ printFlags(O);
+ O << " ";
printOperands(O, SlotTracker);
}
#endif
@@ -2023,10 +2027,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index a02323ab2108f9..7a7eb71ea2f23c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -81,8 +81,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS1-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
-; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
-; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
+; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
+; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1
; CHECK-VS1-NEXT: [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS1-NEXT: store <vscale x 16 x i8> [[TMP24]], ptr [[TMP23]], align 1
@@ -115,8 +115,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS1-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
-; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
+; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
+; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP34]], align 1
; CHECK-VS1-NEXT: [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS1-NEXT: store <vscale x 8 x i8> [[TMP35]], ptr [[TMP34]], align 1
@@ -189,8 +189,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS2-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
-; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
-; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
+; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
+; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP23]], align 1
; CHECK-VS2-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS2-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP23]], align 1
@@ -223,8 +223,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS2-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
-; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
+; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
+; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP34]], align 1
; CHECK-VS2-NEXT: [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS2-NEXT: store <vscale x 4 x i8> [[TMP35]], ptr [[TMP34]], align 1
@@ -279,7 +279,7 @@ while.end:
define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) {
; CHECK-LABEL: define void @trip_count_too_small(
-; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TC]], 3
; CHECK-NEXT: br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
@@ -440,8 +440,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
index fb5ff66989a679..7d71ff87ec8be2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll
@@ -14,36 +14,36 @@ target triple = "aarch64-unknown-linux-gnu"
define void @load_ext_trunc_store(ptr readonly %in, ptr noalias %out, i64 %N) {
; CHECK-LABEL: define void @load_ext_trunc_store(
; CHECK-SAME: ptr readonly [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[IN]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw double, ptr [[IN]], i64 [[TMP0]]
...
[truncated]
|
@@ -8407,10 +8407,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands, | |||
if (Reverse) | |||
VectorPtr = new VPReverseVectorPointerRecipe( | |||
Ptr, &Plan.getVF(), getLoadStoreType(I), | |||
GEP ? GEP->isInBounds() : false, I->getDebugLoc()); | |||
GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds() | |||
: GEPNoWrapFlags::none(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept the previous inbounds-only logic here because propagating nuw for reverse iteration (using negative offsets) seems problematic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Could you also add a variant with just nuw
/nsw
flags to llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll?
Store GEPNoWrapFlags instead of only InBounds and propagate them.
Store GEPNoWrapFlags instead of only InBounds and propagate them.