Skip to content

Commit 7f56a9c

Browse files
committed
[VPlan] Use VPWidenIntrinsicRecipe to support binary and unary operations with EVL-vectorization
1 parent d9e2fb7 commit 7f56a9c

File tree

11 files changed

+91
-149
lines changed

11 files changed

+91
-149
lines changed

llvm/include/llvm/IR/VectorBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/IR/VectorBuilder.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 13 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
867867
case VPRecipeBase::VPWidenGEPSC:
868868
case VPRecipeBase::VPWidenIntrinsicSC:
869869
case VPRecipeBase::VPWidenSC:
870-
case VPRecipeBase::VPWidenEVLSC:
871870
case VPRecipeBase::VPWidenSelectSC:
872871
case VPRecipeBase::VPBlendSC:
873872
case VPRecipeBase::VPPredInstPHISC:
@@ -1058,7 +1057,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10581057
static inline bool classof(const VPRecipeBase *R) {
10591058
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
10601059
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1061-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
10621060
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10631061
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10641062
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1438,16 +1436,11 @@ class VPIRInstruction : public VPRecipeBase {
14381436
class VPWidenRecipe : public VPRecipeWithIRFlags {
14391437
unsigned Opcode;
14401438

1441-
protected:
1442-
template <typename IterT>
1443-
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1444-
iterator_range<IterT> Operands)
1445-
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1446-
14471439
public:
14481440
template <typename IterT>
14491441
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1450-
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1442+
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1443+
Opcode(I.getOpcode()) {}
14511444

14521445
~VPWidenRecipe() override = default;
14531446

@@ -1457,15 +1450,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14571450
return R;
14581451
}
14591452

1460-
static inline bool classof(const VPRecipeBase *R) {
1461-
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1462-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1463-
}
1464-
1465-
static inline bool classof(const VPUser *U) {
1466-
auto *R = dyn_cast<VPRecipeBase>(U);
1467-
return R && classof(R);
1468-
}
1453+
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
14691454

14701455
/// Produce a widened instruction using the opcode and operands of the recipe,
14711456
/// processing State.VF elements.
@@ -1484,54 +1469,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14841469
#endif
14851470
};
14861471

1487-
/// A recipe for widening operations with vector-predication intrinsics with
1488-
/// explicit vector length (EVL).
1489-
class VPWidenEVLRecipe : public VPWidenRecipe {
1490-
using VPRecipeWithIRFlags::transferFlags;
1491-
1492-
public:
1493-
template <typename IterT>
1494-
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1495-
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1496-
addOperand(&EVL);
1497-
}
1498-
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1499-
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1500-
transferFlags(W);
1501-
}
1502-
1503-
~VPWidenEVLRecipe() override = default;
1504-
1505-
VPWidenRecipe *clone() override final {
1506-
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1507-
return nullptr;
1508-
}
1509-
1510-
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1511-
1512-
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1513-
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1514-
1515-
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
1516-
/// processing EVL elements.
1517-
void execute(VPTransformState &State) override final;
1518-
1519-
/// Returns true if the recipe only uses the first lane of operand \p Op.
1520-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1521-
assert(is_contained(operands(), Op) &&
1522-
"Op must be an operand of the recipe");
1523-
// EVL in that recipe is always the last operand, thus any use before means
1524-
// the VPValue should be vectorized.
1525-
return getEVL() == Op;
1526-
}
1527-
1528-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1529-
/// Print the recipe.
1530-
void print(raw_ostream &O, const Twine &Indent,
1531-
VPSlotTracker &SlotTracker) const override final;
1532-
#endif
1533-
};
1534-
15351472
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
15361473
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
15371474
/// Cast instruction opcode.
@@ -1657,6 +1594,16 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16571594
MayWriteToMemory(CI.mayWriteToMemory()),
16581595
MayHaveSideEffects(CI.mayHaveSideEffects()) {}
16591596

1597+
template <typename IterT>
1598+
VPWidenIntrinsicRecipe(Instruction &I, Intrinsic::ID VectorIntrinsicID,
1599+
iterator_range<IterT> Operands, Type *Ty,
1600+
DebugLoc DL = {})
1601+
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, Operands, I),
1602+
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1603+
MayReadFromMemory(I.mayReadFromMemory()),
1604+
MayWriteToMemory(I.mayWriteToMemory()),
1605+
MayHaveSideEffects(I.mayHaveSideEffects()) {}
1606+
16601607
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
16611608
ArrayRef<VPValue *> CallArguments, Type *Ty,
16621609
DebugLoc DL = {})

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
267267
[this](const VPRecipeBase *R) {
268268
return inferScalarType(R->getOperand(0));
269269
})
270-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
271-
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
272-
VPWidenSelectRecipe>(
270+
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
271+
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
273272
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
274273
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
275274
return R->getResultType();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 57 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
9898
case VPWidenLoadSC:
9999
case VPWidenPHISC:
100100
case VPWidenSC:
101-
case VPWidenEVLSC:
102101
case VPWidenSelectSC: {
103102
const Instruction *I =
104103
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -142,7 +141,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
142141
case VPWidenIntOrFpInductionSC:
143142
case VPWidenPHISC:
144143
case VPWidenSC:
145-
case VPWidenEVLSC:
146144
case VPWidenSelectSC: {
147145
const Instruction *I =
148146
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -183,7 +181,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
183181
case VPWidenPHISC:
184182
case VPWidenPointerInductionSC:
185183
case VPWidenSC:
186-
case VPWidenEVLSC:
187184
case VPWidenSelectSC: {
188185
const Instruction *I =
189186
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -957,24 +954,53 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
957954
Args.push_back(Arg);
958955
}
959956

960-
// Use vector version of the intrinsic.
961-
Module *M = State.Builder.GetInsertBlock()->getModule();
962-
Function *VectorF =
963-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
964-
assert(VectorF && "Can't retrieve vector intrinsic.");
957+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
958+
VectorIntrinsicID != Intrinsic::vp_select) {
959+
VectorBuilder VBuilder(State.Builder);
960+
Value *Mask =
961+
State.Builder.CreateVectorSplat(State.VF, State.Builder.getTrue());
962+
VBuilder.setMask(Mask).setEVL(Args.back());
963+
// Remove EVL from Args
964+
Args.pop_back();
965+
966+
if (VectorIntrinsicID == Intrinsic::vp_icmp ||
967+
VectorIntrinsicID == Intrinsic::vp_fcmp) {
968+
auto &Ctx = State.Builder.getContext();
969+
Value *Pred = MetadataAsValue::get(
970+
Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
971+
Args.push_back(Pred);
972+
}
965973

966-
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
967-
SmallVector<OperandBundleDef, 1> OpBundles;
968-
if (CI)
969-
CI->getOperandBundlesAsDefs(OpBundles);
974+
Value *VPInst = VBuilder.createSimpleIntrinsic(
975+
VectorIntrinsicID, TysForDecl[0], Args, "vp.call");
970976

971-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
977+
if (isa<FPMathOperator>(VPInst))
978+
setFlags(cast<Instruction>(VPInst));
972979

973-
setFlags(V);
980+
if (!VPInst->getType()->isVoidTy())
981+
State.set(this, VPInst);
982+
State.addMetadata(VPInst,
983+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
984+
} else {
985+
// Use vector version of the intrinsic.
986+
Module *M = State.Builder.GetInsertBlock()->getModule();
987+
Function *VectorF =
988+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
989+
assert(VectorF && "Can't retrieve vector intrinsic.");
974990

975-
if (!V->getType()->isVoidTy())
976-
State.set(this, V);
977-
State.addMetadata(V, CI);
991+
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
992+
SmallVector<OperandBundleDef, 1> OpBundles;
993+
if (CI)
994+
CI->getOperandBundlesAsDefs(OpBundles);
995+
996+
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
997+
998+
setFlags(V);
999+
1000+
if (!V->getType()->isVoidTy())
1001+
State.set(this, V);
1002+
State.addMetadata(V, CI);
1003+
}
9781004
}
9791005

9801006
InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
@@ -1006,6 +1032,20 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10061032
ParamTys.push_back(
10071033
ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
10081034

1035+
// TODO: Implment in cost model
1036+
if (std::optional<unsigned> FOp =
1037+
VPIntrinsic::getFunctionalOpcodeForVP(VectorIntrinsicID)) {
1038+
if (FOp == Instruction::FNeg) {
1039+
// Instruction *CtxI =
1040+
dyn_cast_or_null<Instruction>(getUnderlyingValue());
1041+
Type *VectorTy = ToVectorTy(getResultType(), VF);
1042+
return Ctx.TTI.getArithmeticInstrCost(
1043+
FOp.value(), VectorTy, CostKind,
1044+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1045+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
1046+
}
1047+
}
1048+
10091049
// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
10101050
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
10111051
IntrinsicCostAttributes CostAttrs(
@@ -1417,42 +1457,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
14171457
}
14181458
}
14191459

1420-
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1421-
unsigned Opcode = getOpcode();
1422-
// TODO: Support other opcodes
1423-
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1424-
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1425-
1426-
State.setDebugLocFrom(getDebugLoc());
1427-
1428-
assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1429-
"VPWidenEVLRecipe should not be used for scalars");
1430-
1431-
VPValue *EVL = getEVL();
1432-
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1433-
IRBuilderBase &BuilderIR = State.Builder;
1434-
VectorBuilder Builder(BuilderIR);
1435-
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1436-
1437-
SmallVector<Value *, 4> Ops;
1438-
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1439-
VPValue *VPOp = getOperand(I);
1440-
Ops.push_back(State.get(VPOp));
1441-
}
1442-
1443-
Builder.setMask(Mask).setEVL(EVLArg);
1444-
Value *VPInst =
1445-
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1446-
// Currently vp-intrinsics only accept FMF flags.
1447-
// TODO: Enable other flags when support is added.
1448-
if (isa<FPMathOperator>(VPInst))
1449-
setFlags(cast<Instruction>(VPInst));
1450-
1451-
State.set(this, VPInst);
1452-
State.addMetadata(VPInst,
1453-
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1454-
}
1455-
14561460
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
14571461
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
14581462
VPSlotTracker &SlotTracker) const {
@@ -1462,15 +1466,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
14621466
printFlags(O);
14631467
printOperands(O, SlotTracker);
14641468
}
1465-
1466-
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1467-
VPSlotTracker &SlotTracker) const {
1468-
O << Indent << "WIDEN ";
1469-
printAsOperand(O, SlotTracker);
1470-
O << " = vp." << Instruction::getOpcodeName(getOpcode());
1471-
printFlags(O);
1472-
printOperands(O, SlotTracker);
1473-
}
14741469
#endif
14751470

14761471
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1465,10 +1465,17 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14651465
})
14661466
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
14671467
unsigned Opcode = W->getOpcode();
1468+
// TODO: Support other opcodes
14681469
if (!Instruction::isBinaryOp(Opcode) &&
14691470
!Instruction::isUnaryOp(Opcode))
14701471
return nullptr;
1471-
return new VPWidenEVLRecipe(*W, EVL);
1472+
auto *I = cast<Instruction>(W->getUnderlyingInstr());
1473+
SmallVector<VPValue *> Ops(W->operands());
1474+
Ops.push_back(&EVL);
1475+
Intrinsic::ID VPID = VPIntrinsic::getForOpcode(W->getOpcode());
1476+
return new VPWidenIntrinsicRecipe(
1477+
*I, VPID, make_range(Ops.begin(), Ops.end()), I->getType(),
1478+
I->getDebugLoc());
14721479
})
14731480
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
14741481
VPValue *NewMask = GetNewMask(Red->getCondOp());

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ class VPDef {
344344
VPWidenStoreEVLSC,
345345
VPWidenStoreSC,
346346
VPWidenSC,
347-
VPWidenEVLSC,
348347
VPWidenSelectSC,
349348
VPBlendSC,
350349
VPHistogramSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
146146
.Case<VPWidenLoadEVLRecipe>([&](const VPWidenLoadEVLRecipe *L) {
147147
return VerifyEVLUse(*L, 1);
148148
})
149-
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
150-
return VerifyEVLUse(
151-
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
152-
})
153149
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
154150
return VerifyEVLUse(*R, 2);
155151
})

0 commit comments

Comments
 (0)