Skip to content

Commit a84848b

Browse files
[LV] Support binary and unary operations with EVL-vectorization
The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`
1 parent 389142e commit a84848b

File tree

8 files changed

+2001
-50
lines changed

8 files changed

+2001
-50
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
861861
case VPRecipeBase::VPWidenCastSC:
862862
case VPRecipeBase::VPWidenGEPSC:
863863
case VPRecipeBase::VPWidenSC:
864+
case VPRecipeBase::VPWidenEVLSC:
864865
case VPRecipeBase::VPWidenSelectSC:
865866
case VPRecipeBase::VPBlendSC:
866867
case VPRecipeBase::VPPredInstPHISC:
@@ -1045,6 +1046,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10451046
static inline bool classof(const VPRecipeBase *R) {
10461047
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
10471048
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1049+
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
10481050
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10491051
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10501052
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1336,13 +1338,18 @@ class VPInstruction : public VPRecipeWithIRFlags {
13361338
/// ingredient. This recipe covers most of the traditional vectorization cases
13371339
/// where each ingredient transforms into a vectorized version of itself.
13381340
class VPWidenRecipe : public VPRecipeWithIRFlags {
1341+
protected:
13391342
unsigned Opcode;
13401343

1344+
template <typename IterT>
1345+
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1346+
iterator_range<IterT> Operands)
1347+
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1348+
13411349
public:
13421350
template <typename IterT>
13431351
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1344-
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1345-
Opcode(I.getOpcode()) {}
1352+
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
13461353

13471354
~VPWidenRecipe() override = default;
13481355

@@ -1366,6 +1373,49 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
13661373
#endif
13671374
};
13681375

1376+
class VPWidenEVLRecipe : public VPWidenRecipe {
1377+
private:
1378+
using VPRecipeWithIRFlags::transferFlags;
1379+
1380+
public:
1381+
template <typename IterT>
1382+
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1383+
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1384+
addOperand(&EVL);
1385+
}
1386+
1387+
~VPWidenEVLRecipe() override = default;
1388+
1389+
VPWidenRecipe *clone() override final {
1390+
SmallVector<VPValue *> Ops(operands());
1391+
VPValue *EVL = Ops.pop_back_val();
1392+
auto *R = new VPWidenEVLRecipe(*getUnderlyingInstr(),
1393+
make_range(Ops.begin(), Ops.end()), *EVL);
1394+
R->transferFlags(*this);
1395+
return R;
1396+
}
1397+
1398+
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1399+
1400+
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1401+
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1402+
1403+
/// A helper function to create widen EVL recipe from regular widen recipe.
1404+
static VPWidenEVLRecipe *create(VPWidenRecipe *W, VPValue &EVL);
1405+
1406+
/// Produce widened copies of all Ingredients.
1407+
void execute(VPTransformState &State) override final;
1408+
1409+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1410+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
1411+
1412+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1413+
/// Print the recipe.
1414+
void print(raw_ostream &O, const Twine &Indent,
1415+
VPSlotTracker &SlotTracker) const override final;
1416+
#endif
1417+
};
1418+
13691419
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
13701420
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
13711421
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/Instructions.h"
2424
#include "llvm/IR/Type.h"
2525
#include "llvm/IR/Value.h"
26+
#include "llvm/IR/VectorBuilder.h"
2627
#include "llvm/Support/Casting.h"
2728
#include "llvm/Support/CommandLine.h"
2829
#include "llvm/Support/Debug.h"
@@ -71,6 +72,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7172
case VPWidenLoadSC:
7273
case VPWidenPHISC:
7374
case VPWidenSC:
75+
case VPWidenEVLSC:
7476
case VPWidenSelectSC: {
7577
const Instruction *I =
7678
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -110,6 +112,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
110112
case VPWidenIntOrFpInductionSC:
111113
case VPWidenPHISC:
112114
case VPWidenSC:
115+
case VPWidenEVLSC:
113116
case VPWidenSelectSC: {
114117
const Instruction *I =
115118
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -159,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
159162
case VPWidenPHISC:
160163
case VPWidenPointerInductionSC:
161164
case VPWidenSC:
165+
case VPWidenEVLSC:
162166
case VPWidenSelectSC: {
163167
const Instruction *I =
164168
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -1049,6 +1053,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
10491053
#endif
10501054
}
10511055

1056+
VPWidenEVLRecipe *VPWidenEVLRecipe::create(VPWidenRecipe *W, VPValue &EVL) {
1057+
auto *R = new VPWidenEVLRecipe(*W->getUnderlyingInstr(), W->operands(), EVL);
1058+
R->transferFlags(*W);
1059+
return R;
1060+
}
1061+
1062+
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1063+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
1064+
"explicit vector length.");
1065+
VPValue *Op0 = getOperand(0);
1066+
1067+
// If it's scalar operation, hand translation over to VPWidenRecipe
1068+
if (!State.get(Op0, 0)->getType()->isVectorTy())
1069+
return VPWidenRecipe::execute(State);
1070+
1071+
VPValue *EVL = getEVL();
1072+
Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
1073+
unsigned Opcode = getOpcode();
1074+
Instruction *I = getUnderlyingInstr();
1075+
IRBuilderBase &BuilderIR = State.Builder;
1076+
VectorBuilder Builder(BuilderIR);
1077+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1078+
Value *VPInst = nullptr;
1079+
1080+
//===------------------- Binary and Unary Ops ---------------------===//
1081+
if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
1082+
// Just widen unops and binops.
1083+
1084+
SmallVector<Value *, 4> Ops;
1085+
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1086+
VPValue *VPOp = getOperand(I);
1087+
Ops.push_back(State.get(VPOp, 0));
1088+
}
1089+
1090+
Builder.setMask(Mask).setEVL(EVLArg);
1091+
VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops,
1092+
"vp.op");
1093+
1094+
if (I)
1095+
if (auto *VecOp = dyn_cast<Instruction>(VPInst))
1096+
VecOp->copyIRFlags(I);
1097+
} else {
1098+
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1099+
}
1100+
State.set(this, VPInst, 0);
1101+
State.addMetadata(VPInst, I);
1102+
}
1103+
1104+
bool VPWidenEVLRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1105+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1106+
// EVL in that recipe is always the last operand, thus any use before means
1107+
// the VPValue should be vectorized.
1108+
for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
1109+
if (getOperand(I) == Op)
1110+
return false;
1111+
return true;
1112+
}
1113+
10521114
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10531115
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
10541116
VPSlotTracker &SlotTracker) const {
@@ -1058,6 +1120,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
10581120
printFlags(O);
10591121
printOperands(O, SlotTracker);
10601122
}
1123+
1124+
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1125+
VPSlotTracker &SlotTracker) const {
1126+
O << Indent << "WIDEN vp ";
1127+
printAsOperand(O, SlotTracker);
1128+
O << " = " << Instruction::getOpcodeName(Opcode);
1129+
printFlags(O);
1130+
printOperands(O, SlotTracker);
1131+
}
10611132
#endif
10621133

10631134
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/ADT/PostOrderIterator.h"
2121
#include "llvm/ADT/STLExtras.h"
2222
#include "llvm/ADT/SetVector.h"
23+
#include "llvm/ADT/TypeSwitch.h"
2324
#include "llvm/Analysis/IVDescriptors.h"
2425
#include "llvm/Analysis/VectorUtils.h"
2526
#include "llvm/IR/Intrinsics.h"
@@ -1307,7 +1308,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
13071308
/// WideCanonicalIV, backedge-taken-count) pattern.
13081309
/// TODO: Introduce explicit recipe for header-mask instead of searching
13091310
/// for the header-mask pattern manually.
1310-
static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1311+
static DenseSet<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13111312
SmallVector<VPValue *> WideCanonicalIVs;
13121313
auto *FoundWidenCanonicalIVUser =
13131314
find_if(Plan.getCanonicalIV()->users(),
@@ -1333,7 +1334,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13331334

13341335
// Walk users of wide canonical IVs and collect to all compares of the form
13351336
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1336-
SmallVector<VPValue *> HeaderMasks;
1337+
DenseSet<VPValue *> HeaderMasks;
13371338
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
13381339
for (auto *Wide : WideCanonicalIVs) {
13391340
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
@@ -1345,7 +1346,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13451346

13461347
assert(HeaderMask->getOperand(0) == Wide &&
13471348
"WidenCanonicalIV must be the first operand of the compare");
1348-
HeaderMasks.push_back(HeaderMask);
1349+
HeaderMasks.insert(HeaderMask);
13491350
}
13501351
}
13511352
return HeaderMasks;
@@ -1384,6 +1385,56 @@ void VPlanTransforms::addActiveLaneMask(
13841385
HeaderMask->replaceAllUsesWith(LaneMask);
13851386
}
13861387

1388+
/// Replace recipes with their EVL variants.
1389+
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1390+
DenseSet<VPRecipeBase *> ToRemove;
1391+
1392+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1393+
Plan.getEntry());
1394+
DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1395+
for (VPBasicBlock *VPBB :
1396+
reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
1397+
// The recipes in the block are processed in reverse order, to catch chains
1398+
// of dead recipes.
1399+
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
1400+
TypeSwitch<VPRecipeBase *>(&R)
1401+
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
1402+
VPValue *NewMask =
1403+
HeaderMasks.contains(L->getMask()) ? nullptr : L->getMask();
1404+
auto *N = new VPWidenLoadEVLRecipe(L, &EVL, NewMask);
1405+
N->insertBefore(L);
1406+
L->replaceAllUsesWith(N);
1407+
ToRemove.insert(L);
1408+
})
1409+
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
1410+
VPValue *NewMask =
1411+
HeaderMasks.contains(S->getMask()) ? nullptr : S->getMask();
1412+
auto *N = new VPWidenStoreEVLRecipe(S, &EVL, NewMask);
1413+
N->insertBefore(S);
1414+
ToRemove.insert(S);
1415+
})
1416+
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) {
1417+
unsigned Opcode = W->getOpcode();
1418+
if (!Instruction::isBinaryOp(Opcode) &&
1419+
!Instruction::isUnaryOp(Opcode))
1420+
return;
1421+
auto *N = VPWidenEVLRecipe::create(W, EVL);
1422+
N->insertBefore(W);
1423+
W->replaceAllUsesWith(N);
1424+
ToRemove.insert(W);
1425+
});
1426+
}
1427+
}
1428+
1429+
for (VPRecipeBase *R : ToRemove)
1430+
R->eraseFromParent();
1431+
1432+
for (VPValue *HeaderMask : HeaderMasks)
1433+
recursivelyDeleteDeadRecipes(HeaderMask);
1434+
}
1435+
1436+
1437+
13871438
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
13881439
/// replaces all uses except the canonical IV increment of
13891440
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1444,29 +1495,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14441495
NextEVLIV->insertBefore(CanonicalIVIncrement);
14451496
EVLPhi->addOperand(NextEVLIV);
14461497

1447-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1448-
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1449-
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1450-
if (!MemR)
1451-
continue;
1452-
VPValue *OrigMask = MemR->getMask();
1453-
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
1454-
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1455-
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1456-
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1457-
N->insertBefore(L);
1458-
L->replaceAllUsesWith(N);
1459-
L->eraseFromParent();
1460-
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1461-
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1462-
N->insertBefore(S);
1463-
S->eraseFromParent();
1464-
} else {
1465-
llvm_unreachable("unsupported recipe");
1466-
}
1467-
}
1468-
recursivelyDeleteDeadRecipes(HeaderMask);
1469-
}
1498+
transformRecipestoEVLRecipes(Plan, *VPEVL);
1499+
14701500
// Replace all uses of VPCanonicalIVPHIRecipe by
14711501
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
14721502
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class VPDef {
356356
VPWidenStoreEVLSC,
357357
VPWidenStoreSC,
358358
VPWidenSC,
359+
VPWidenEVLSC,
359360
VPWidenSelectSC,
360361
VPBlendSC,
361362
// START: Phi-like recipes. Need to be kept together.

0 commit comments

Comments
 (0)