Skip to content

[VPlan] Unroll by VF with Pack/Unpack. #145188

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7328,6 +7328,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
Expand Down
21 changes: 10 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,13 @@ Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
return Data.VPV2Scalars[Def][0];
}

// Look through BuildVector to avoid redundant extracts.
// TODO: Remove once replicate regions are unrolled explicitly.
if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
auto *BuildVector = cast<VPInstruction>(Def);
return get(BuildVector->getOperand(Lane.getKnownLane()), true);
}

assert(hasVectorValue(Def));
auto *VecPart = Data.VPV2Vector[Def];
if (!VecPart->getType()->isVectorTy()) {
Expand Down Expand Up @@ -360,17 +367,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
// resulting vectors are stored in State, we will only generate the
// insertelements once.
Value *VectorValue = nullptr;
if (IsSingleScalar) {
VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
set(Def, VectorValue);
}
assert(IsSingleScalar && "replicates must be packed explicitly");
VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
Builder.restoreIP(OldIP);
return VectorValue;
}
Expand Down
26 changes: 18 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,13 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
/// Given operands of (the same) struct type, creates a struct of fixed-
/// width vectors each containing a struct field of all operands. The
/// number of operands matches the element count of every vector.
BuildStructVector,
/// Creates a fixed-width vector containing all operands. The number of
/// operands matches the vector element count.
BuildVector,
ComputeAnyOfResult,
ComputeFindLastIVResult,
ComputeReductionResult,
Expand Down Expand Up @@ -970,6 +977,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,

Pack,
Unpack,

};

private:
Expand All @@ -979,14 +989,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Returns true if this VPInstruction generates scalar values for all lanes.
/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
/// values per all lanes, stemming from an original ingredient. This method
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;
Expand Down Expand Up @@ -1080,6 +1082,14 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// result is also a single scalar.
bool isSingleScalar() const;

/// Returns true if this VPInstruction generates scalar values for all lanes.
/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
/// values per all lanes, stemming from an original ingredient. This method
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;

/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }
};
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::ExplicitVectorLength:
return Type::getIntNTy(Ctx, 32);
case Instruction::PHI:
case VPInstruction::Pack:
case VPInstruction::Unpack:
// Infer the type of first operand only, as other operands of header phi's
// may lead to infinite recursion.
return inferScalarType(R->getOperand(0));
Expand All @@ -108,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
return SetResultTyFromOp();
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
Expand Down Expand Up @@ -440,6 +444,10 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
if (!VPBB->getParent())
break;
for (VPRecipeBase &R : *VPBB) {
if (isa<VPInstruction>(&R) &&
(cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Pack ||
cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Unpack))
continue;
Idx2Recipe.push_back(&R);

// Save the end location of each USE.
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;

auto *VPI = dyn_cast<VPInstruction>(R);
if (VPI && VPI->getOpcode() == VPInstruction::BuildVector)
return true;
assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
"recipe with matched opcode does not have the expected number of "
"operands");
Expand Down Expand Up @@ -263,6 +266,10 @@ struct Recipe_match {
}
};

template <unsigned Opcode, typename... RecipeTys>
using ZeroOpRecipe_match =
Recipe_match<std::tuple<>, Opcode, false, RecipeTys...>;

template <typename Op0_t, unsigned Opcode, typename... RecipeTys>
using UnaryRecipe_match =
Recipe_match<std::tuple<Op0_t>, Opcode, false, RecipeTys...>;
Expand All @@ -271,6 +278,9 @@ template <typename Op0_t, unsigned Opcode>
using UnaryVPInstruction_match =
UnaryRecipe_match<Op0_t, Opcode, VPInstruction>;

template <unsigned Opcode>
using ZeroOpVPInstruction_match = ZeroOpRecipe_match<Opcode, VPInstruction>;

template <typename Op0_t, unsigned Opcode>
using AllUnaryRecipe_match =
UnaryRecipe_match<Op0_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
Expand Down Expand Up @@ -302,6 +312,10 @@ using AllBinaryRecipe_match =
BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPWidenRecipe,
VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>;

inline ZeroOpVPInstruction_match<VPInstruction::BuildVector> m_BuildVector() {
return ZeroOpVPInstruction_match<VPInstruction::BuildVector>();
}

template <unsigned Opcode, typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, Opcode>
m_VPInstruction(const Op0_t &Op0) {
Expand Down Expand Up @@ -364,6 +378,12 @@ m_Broadcast(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
}

template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::ExtractLastElement>
m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}

template <typename Op0_t, typename Op1_t>
inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::ActiveLaneMask>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {
Expand Down
Loading
Loading