Skip to content

[VPlan] Propagate all GEP flags #119899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ class VPBuilder {

VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}

VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8406,10 +8406,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
: GEPNoWrapFlags::none(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kept the previous inbounds-only logic here because propagating nuw for reverse iteration (using negative offsets) seems problematic.

I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->isInBounds() : false,
GEP ? GEP->getNoWrapFlags()
: GEPNoWrapFlags::none(),
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
Expand Down
44 changes: 16 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -952,11 +952,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};

struct GEPFlagsTy {
char IsInBounds : 1;
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
};

private:
struct ExactFlagsTy {
char IsExact : 1;
Expand All @@ -983,7 +978,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
WrapFlagsTy WrapFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
GEPFlagsTy GEPFlags;
GEPNoWrapFlags GEPFlags;
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
Expand Down Expand Up @@ -1020,7 +1015,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
GEPFlags.IsInBounds = GEP->isInBounds();
GEPFlags = GEP->getNoWrapFlags();
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
OpType = OperationType::NonNegOp;
NonNegFlags.NonNeg = PNNI->hasNonNeg();
Expand Down Expand Up @@ -1060,7 +1055,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
GEPFlagsTy GEPFlags, DebugLoc DL = {})
GEPNoWrapFlags GEPFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}

Expand Down Expand Up @@ -1097,7 +1092,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = false;
break;
case OperationType::GEPOp:
GEPFlags.IsInBounds = false;
GEPFlags = GEPNoWrapFlags::none();
break;
case OperationType::FPMathOp:
FMFs.NoNaNs = false;
Expand Down Expand Up @@ -1126,10 +1121,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
I->setIsExact(ExactFlags.IsExact);
break;
case OperationType::GEPOp:
// TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
cast<GetElementPtrInst>(I)->setNoWrapFlags(
GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
: GEPNoWrapFlags::none());
cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
break;
case OperationType::FPMathOp:
I->setHasAllowReassoc(FMFs.AllowReassoc);
Expand All @@ -1155,11 +1147,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return CmpPredicate;
}

bool isInBounds() const {
assert(OpType == OperationType::GEPOp &&
"recipe doesn't have inbounds flag");
return GEPFlags.IsInBounds;
}
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }

/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
Expand Down Expand Up @@ -1306,7 +1294,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}

VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags,
DebugLoc DL = {}, const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC,
ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
Expand Down Expand Up @@ -1914,10 +1902,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,

public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
bool IsInBounds, DebugLoc DL)
GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
ArrayRef<VPValue *>({Ptr, VF}),
GEPFlagsTy(IsInBounds), DL),
ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
Expand Down Expand Up @@ -1949,8 +1936,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPReverseVectorPointerRecipe *clone() override {
return new VPReverseVectorPointerRecipe(
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
return new VPReverseVectorPointerRecipe(getOperand(0), getVFValue(),
IndexedTy, getGEPNoWrapFlags(),
getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand All @@ -1966,10 +1954,10 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
Type *IndexedTy;

public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
GEPFlags, DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
Expand All @@ -1991,8 +1979,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
getDebugLoc());
return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
getGEPNoWrapFlags(), getDebugLoc());
}

/// Return the cost of this VPHeaderPHIRecipe.
Expand Down
34 changes: 19 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Addend = State.get(getOperand(1), VPLane(0));
return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
: Builder.CreatePtrAdd(Ptr, Addend, Name);
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
Expand Down Expand Up @@ -1276,8 +1275,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
getFastMathFlags().print(O);
break;
case OperationType::GEPOp:
if (GEPFlags.IsInBounds)
if (GEPFlags.isInBounds())
O << " inbounds";
else if (GEPFlags.hasNoUnsignedSignedWrap())
O << " nusw";
if (GEPFlags.hasNoUnsignedWrap())
O << " nuw";
break;
case OperationType::NonNegOp:
if (NonNegFlags.NonNeg)
Expand Down Expand Up @@ -1906,9 +1909,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
Ops.push_back(State.get(getOperand(I), VPLane(0)));

auto *NewGEP =
State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
ArrayRef(Ops).drop_front(), "", isInBounds());
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
ArrayRef(Ops).drop_front(), "",
getGEPNoWrapFlags());
Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, Splat);
State.addMetadata(Splat, GEP);
Expand All @@ -1934,7 +1937,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
Indices, "", isInBounds());
Indices, "", getGEPNoWrapFlags());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP);
Expand Down Expand Up @@ -1985,9 +1988,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
Value *ResultPtr =
Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
getGEPNoWrapFlags());

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand All @@ -1997,9 +2001,9 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << " = reverse-vector-pointer ";
if (isInBounds())
O << "inbounds ";
O << " = reverse-vector-pointer";
printFlags(O);
O << " ";
printOperands(O, SlotTracker);
}
#endif
Expand All @@ -2011,10 +2015,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();

Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
Value *ResultPtr =
Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS1-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1
; CHECK-VS1-NEXT: [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS1-NEXT: store <vscale x 16 x i8> [[TMP24]], ptr [[TMP23]], align 1
Expand Down Expand Up @@ -115,8 +115,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS1-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP34]], align 1
; CHECK-VS1-NEXT: [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS1-NEXT: store <vscale x 8 x i8> [[TMP35]], ptr [[TMP34]], align 1
Expand Down Expand Up @@ -189,8 +189,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS2-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP23]], align 1
; CHECK-VS2-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS2-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP23]], align 1
Expand Down Expand Up @@ -223,8 +223,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS2-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP34]], align 1
; CHECK-VS2-NEXT: [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS2-NEXT: store <vscale x 4 x i8> [[TMP35]], ptr [[TMP34]], align 1
Expand Down Expand Up @@ -279,7 +279,7 @@ while.end:

define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) {
; CHECK-LABEL: define void @trip_count_too_small(
; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TC]], 3
; CHECK-NEXT: br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
Expand Down Expand Up @@ -440,8 +440,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
Expand Down
Loading
Loading