Skip to content

[LV][EVL] Support fixed-order recurrence idiom with EVL tail folding. #124093

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
return (ScalarOpdIdx == 2);
case Intrinsic::experimental_vp_splice:
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4 || ScalarOpdIdx == 5;
default:
return false;
}
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1454,12 +1454,9 @@ class LoopVectorizationCostModel {
// Override forced styles if needed.
// FIXME: use actual opcode/data type for analysis here.
// FIXME: Investigate opportunity for fixed vector factor.
// FIXME: support fixed-order recurrences by fixing splice of non VFxUF
// penultimate EVL.
bool EVLIsLegal = UserIC <= 1 && IsScalableVF &&
TTI.hasActiveVectorLength(0, nullptr, Align()) &&
!EnableVPlanNativePath &&
Legal->getFixedOrderRecurrences().empty();
!EnableVPlanNativePath;
if (!EVLIsLegal) {
// If for some reason EVL mode is unsupported, fallback to
// DataWithoutLaneMask to try to vectorize the loop with folded tail
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPScalarCastSC:
case VPRecipeBase::VPScalarPHISC:
case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
Expand Down
45 changes: 42 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1659,10 +1659,13 @@ void VPlanTransforms::addActiveLaneMask(
/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
/// \p EVL The explicit vector length parameter of vector-predication
/// intrinsics.
/// \p PrevEVL The explicit vector length of the previous iteration. Only
/// required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice.
static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPRecipeBase &CurRecipe,
VPTypeAnalysis &TypeInfo,
VPValue &AllOneMask, VPValue &EVL) {
VPValue &AllOneMask, VPValue &EVL,
VPValue *PrevEVL) {
using namespace llvm::VPlanPatternMatch;
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
assert(OrigMask && "Unmasked recipe when folding tail");
Expand Down Expand Up @@ -1690,6 +1693,18 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
Sel->getDebugLoc());
})
.Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
assert(PrevEVL && "Fixed-order recurrences require previous EVL");
VPValue *MinusOneVPV = VPI->getParent()->getPlan()->getOrAddLiveIn(
ConstantInt::getSigned(Type::getInt32Ty(TypeInfo.getContext()),
-1));
SmallVector<VPValue *> Ops(VPI->operands());
Ops.append({MinusOneVPV, &AllOneMask, PrevEVL, &EVL});
return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice,
Ops, TypeInfo.inferScalarType(VPI),
VPI->getDebugLoc());
}

VPValue *LHS, *RHS;
// Transform select with a header mask condition
// select(header_mask, LHS, RHS)
Expand All @@ -1713,6 +1728,30 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(CanonicalIVType);
LLVMContext &Ctx = CanonicalIVType->getContext();
VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();

// Create a scalar phi to track the previous EVL if fixed-order recurrence is
// contained.
VPScalarPHIRecipe *PrevEVL = nullptr;
bool ContainsFORs =
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
if (ContainsFORs) {
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
VPValue *MaxEVL = &Plan.getVF();
// Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
if (unsigned VFSize =
TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits();
VFSize != 32) {
MaxEVL = new VPScalarCastRecipe(
VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL,
Type::getInt32Ty(Ctx), DebugLoc());
VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB();
Preheader->appendRecipe(cast<VPScalarCastRecipe>(MaxEVL));
}
PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl");
PrevEVL->insertBefore(*Header, Header->getFirstNonPhi());
}

for (VPUser *U : to_vector(Plan.getVF().users())) {
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
Expand All @@ -1724,8 +1763,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
VPRecipeBase *EVLRecipe = createEVLRecipe(
HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL);
if (!EVLRecipe)
continue;

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
})
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe>(
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe,
VPScalarPHIRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
Expand Down
Loading