Skip to content

Commit f7f90bc

Browse files
[LV] Support binary and unary operations with EVL-vectorization
The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`
1 parent 600ff28 commit f7f90bc

File tree

8 files changed

+1966
-50
lines changed

8 files changed

+1966
-50
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
870870
case VPRecipeBase::VPWidenCastSC:
871871
case VPRecipeBase::VPWidenGEPSC:
872872
case VPRecipeBase::VPWidenSC:
873+
case VPRecipeBase::VPWidenEVLSC:
873874
case VPRecipeBase::VPWidenSelectSC:
874875
case VPRecipeBase::VPBlendSC:
875876
case VPRecipeBase::VPPredInstPHISC:
@@ -1054,6 +1055,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10541055
static inline bool classof(const VPRecipeBase *R) {
10551056
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
10561057
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1058+
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
10571059
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10581060
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10591061
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1345,13 +1347,18 @@ class VPInstruction : public VPRecipeWithIRFlags {
13451347
/// ingredient. This recipe covers most of the traditional vectorization cases
13461348
/// where each ingredient transforms into a vectorized version of itself.
13471349
class VPWidenRecipe : public VPRecipeWithIRFlags {
1350+
protected:
13481351
unsigned Opcode;
13491352

1353+
template <typename IterT>
1354+
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1355+
iterator_range<IterT> Operands)
1356+
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1357+
13501358
public:
13511359
template <typename IterT>
13521360
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1353-
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1354-
Opcode(I.getOpcode()) {}
1361+
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
13551362

13561363
~VPWidenRecipe() override = default;
13571364

@@ -1375,6 +1382,49 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
13751382
#endif
13761383
};
13771384

1385+
class VPWidenEVLRecipe : public VPWidenRecipe {
1386+
private:
1387+
using VPRecipeWithIRFlags::transferFlags;
1388+
1389+
public:
1390+
template <typename IterT>
1391+
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1392+
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1393+
addOperand(&EVL);
1394+
}
1395+
1396+
~VPWidenEVLRecipe() override = default;
1397+
1398+
VPWidenRecipe *clone() override final {
1399+
SmallVector<VPValue *> Ops(operands());
1400+
VPValue *EVL = Ops.pop_back_val();
1401+
auto *R = new VPWidenEVLRecipe(*getUnderlyingInstr(),
1402+
make_range(Ops.begin(), Ops.end()), *EVL);
1403+
R->transferFlags(*this);
1404+
return R;
1405+
}
1406+
1407+
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1408+
1409+
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1410+
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1411+
1412+
/// A helper function to create widen EVL recipe from regular widen recipe.
1413+
static VPWidenEVLRecipe *create(VPWidenRecipe *W, VPValue &EVL);
1414+
1415+
/// Produce widened copies of all Ingredients.
1416+
void execute(VPTransformState &State) override final;
1417+
1418+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1419+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
1420+
1421+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1422+
/// Print the recipe.
1423+
void print(raw_ostream &O, const Twine &Indent,
1424+
VPSlotTracker &SlotTracker) const override final;
1425+
#endif
1426+
};
1427+
13781428
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
13791429
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
13801430
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/Instructions.h"
2424
#include "llvm/IR/Type.h"
2525
#include "llvm/IR/Value.h"
26+
#include "llvm/IR/VectorBuilder.h"
2627
#include "llvm/Support/Casting.h"
2728
#include "llvm/Support/CommandLine.h"
2829
#include "llvm/Support/Debug.h"
@@ -71,6 +72,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7172
case VPWidenLoadSC:
7273
case VPWidenPHISC:
7374
case VPWidenSC:
75+
case VPWidenEVLSC:
7476
case VPWidenSelectSC: {
7577
const Instruction *I =
7678
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -110,6 +112,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
110112
case VPWidenIntOrFpInductionSC:
111113
case VPWidenPHISC:
112114
case VPWidenSC:
115+
case VPWidenEVLSC:
113116
case VPWidenSelectSC: {
114117
const Instruction *I =
115118
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -159,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
159162
case VPWidenPHISC:
160163
case VPWidenPointerInductionSC:
161164
case VPWidenSC:
165+
case VPWidenEVLSC:
162166
case VPWidenSelectSC: {
163167
const Instruction *I =
164168
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -1050,6 +1054,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
10501054
#endif
10511055
}
10521056

1057+
VPWidenEVLRecipe *VPWidenEVLRecipe::create(VPWidenRecipe *W, VPValue &EVL) {
1058+
auto *R = new VPWidenEVLRecipe(*W->getUnderlyingInstr(), W->operands(), EVL);
1059+
R->transferFlags(*W);
1060+
return R;
1061+
}
1062+
1063+
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1064+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
1065+
"explicit vector length.");
1066+
VPValue *Op0 = getOperand(0);
1067+
1068+
// If it's scalar operation, hand translation over to VPWidenRecipe
1069+
if (!State.get(Op0, 0)->getType()->isVectorTy())
1070+
return VPWidenRecipe::execute(State);
1071+
1072+
VPValue *EVL = getEVL();
1073+
Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
1074+
unsigned Opcode = getOpcode();
1075+
Instruction *I = getUnderlyingInstr();
1076+
IRBuilderBase &BuilderIR = State.Builder;
1077+
VectorBuilder Builder(BuilderIR);
1078+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1079+
Value *VPInst = nullptr;
1080+
1081+
//===------------------- Binary and Unary Ops ---------------------===//
1082+
if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
1083+
// Just widen unops and binops.
1084+
1085+
SmallVector<Value *, 4> Ops;
1086+
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1087+
VPValue *VPOp = getOperand(I);
1088+
Ops.push_back(State.get(VPOp, 0));
1089+
}
1090+
1091+
Builder.setMask(Mask).setEVL(EVLArg);
1092+
VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops,
1093+
"vp.op");
1094+
1095+
if (I)
1096+
if (auto *VecOp = dyn_cast<Instruction>(VPInst))
1097+
VecOp->copyIRFlags(I);
1098+
} else {
1099+
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1100+
}
1101+
State.set(this, VPInst, 0);
1102+
State.addMetadata(VPInst, I);
1103+
}
1104+
1105+
bool VPWidenEVLRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1106+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1107+
// EVL in that recipe is always the last operand, thus any use before means
1108+
// the VPValue should be vectorized.
1109+
for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
1110+
if (getOperand(I) == Op)
1111+
return false;
1112+
return true;
1113+
}
1114+
10531115
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10541116
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
10551117
VPSlotTracker &SlotTracker) const {
@@ -1059,6 +1121,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
10591121
printFlags(O);
10601122
printOperands(O, SlotTracker);
10611123
}
1124+
1125+
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1126+
VPSlotTracker &SlotTracker) const {
1127+
O << Indent << "WIDEN vp ";
1128+
printAsOperand(O, SlotTracker);
1129+
O << " = " << Instruction::getOpcodeName(Opcode);
1130+
printFlags(O);
1131+
printOperands(O, SlotTracker);
1132+
}
10621133
#endif
10631134

10641135
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/ADT/PostOrderIterator.h"
2121
#include "llvm/ADT/STLExtras.h"
2222
#include "llvm/ADT/SetVector.h"
23+
#include "llvm/ADT/TypeSwitch.h"
2324
#include "llvm/Analysis/IVDescriptors.h"
2425
#include "llvm/Analysis/VectorUtils.h"
2526
#include "llvm/IR/Intrinsics.h"
@@ -1307,7 +1308,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
13071308
/// WideCanonicalIV, backedge-taken-count) pattern.
13081309
/// TODO: Introduce explicit recipe for header-mask instead of searching
13091310
/// for the header-mask pattern manually.
1310-
static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1311+
static DenseSet<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13111312
SmallVector<VPValue *> WideCanonicalIVs;
13121313
auto *FoundWidenCanonicalIVUser =
13131314
find_if(Plan.getCanonicalIV()->users(),
@@ -1333,7 +1334,8 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13331334

13341335
// Walk users of wide canonical IVs and collect to all compares of the form
13351336
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1336-
SmallVector<VPValue *> HeaderMasks;
1337+
DenseSet<VPValue *> HeaderMasks;
1338+
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
13371339
for (auto *Wide : WideCanonicalIVs) {
13381340
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
13391341
auto *HeaderMask = dyn_cast<VPInstruction>(U);
@@ -1342,7 +1344,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
13421344

13431345
assert(HeaderMask->getOperand(0) == Wide &&
13441346
"WidenCanonicalIV must be the first operand of the compare");
1345-
HeaderMasks.push_back(HeaderMask);
1347+
HeaderMasks.insert(HeaderMask);
13461348
}
13471349
}
13481350
return HeaderMasks;
@@ -1381,6 +1383,56 @@ void VPlanTransforms::addActiveLaneMask(
13811383
HeaderMask->replaceAllUsesWith(LaneMask);
13821384
}
13831385

1386+
/// Replace recipes with their EVL variants.
1387+
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1388+
DenseSet<VPRecipeBase *> ToRemove;
1389+
1390+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1391+
Plan.getEntry());
1392+
DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1393+
for (VPBasicBlock *VPBB :
1394+
reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
1395+
// The recipes in the block are processed in reverse order, to catch chains
1396+
// of dead recipes.
1397+
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
1398+
TypeSwitch<VPRecipeBase *>(&R)
1399+
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
1400+
VPValue *NewMask =
1401+
HeaderMasks.contains(L->getMask()) ? nullptr : L->getMask();
1402+
auto *N = new VPWidenLoadEVLRecipe(L, &EVL, NewMask);
1403+
N->insertBefore(L);
1404+
L->replaceAllUsesWith(N);
1405+
ToRemove.insert(L);
1406+
})
1407+
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
1408+
VPValue *NewMask =
1409+
HeaderMasks.contains(S->getMask()) ? nullptr : S->getMask();
1410+
auto *N = new VPWidenStoreEVLRecipe(S, &EVL, NewMask);
1411+
N->insertBefore(S);
1412+
ToRemove.insert(S);
1413+
})
1414+
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) {
1415+
unsigned Opcode = W->getOpcode();
1416+
if (!Instruction::isBinaryOp(Opcode) &&
1417+
!Instruction::isUnaryOp(Opcode))
1418+
return;
1419+
auto *N = VPWidenEVLRecipe::create(W, EVL);
1420+
N->insertBefore(W);
1421+
W->replaceAllUsesWith(N);
1422+
ToRemove.insert(W);
1423+
});
1424+
}
1425+
}
1426+
1427+
for (VPRecipeBase *R : ToRemove)
1428+
R->eraseFromParent();
1429+
1430+
for (VPValue *HeaderMask : HeaderMasks)
1431+
recursivelyDeleteDeadRecipes(HeaderMask);
1432+
}
1433+
1434+
1435+
13841436
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
13851437
/// replaces all uses except the canonical IV increment of
13861438
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1441,29 +1493,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14411493
NextEVLIV->insertBefore(CanonicalIVIncrement);
14421494
EVLPhi->addOperand(NextEVLIV);
14431495

1444-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1445-
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1446-
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1447-
if (!MemR)
1448-
continue;
1449-
VPValue *OrigMask = MemR->getMask();
1450-
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
1451-
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1452-
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1453-
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1454-
N->insertBefore(L);
1455-
L->replaceAllUsesWith(N);
1456-
L->eraseFromParent();
1457-
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1458-
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1459-
N->insertBefore(S);
1460-
S->eraseFromParent();
1461-
} else {
1462-
llvm_unreachable("unsupported recipe");
1463-
}
1464-
}
1465-
recursivelyDeleteDeadRecipes(HeaderMask);
1466-
}
1496+
transformRecipestoEVLRecipes(Plan, *VPEVL);
1497+
14671498
// Replace all uses of VPCanonicalIVPHIRecipe by
14681499
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
14691500
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class VPDef {
356356
VPWidenStoreEVLSC,
357357
VPWidenStoreSC,
358358
VPWidenSC,
359+
VPWidenEVLSC,
359360
VPWidenSelectSC,
360361
VPBlendSC,
361362
// START: Phi-like recipes. Need to be kept together.

0 commit comments

Comments
 (0)