Skip to content

Commit 00e40c9

Browse files
[LV] Support binary and unary operations with EVL-vectorization (#93854)
The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`
1 parent 2cb4d1b commit 00e40c9

11 files changed

+2122
-162
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -923,6 +923,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
923923
case VPRecipeBase::VPWidenCastSC:
924924
case VPRecipeBase::VPWidenGEPSC:
925925
case VPRecipeBase::VPWidenSC:
926+
case VPRecipeBase::VPWidenEVLSC:
926927
case VPRecipeBase::VPWidenSelectSC:
927928
case VPRecipeBase::VPBlendSC:
928929
case VPRecipeBase::VPPredInstPHISC:
@@ -1107,6 +1108,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
11071108
static inline bool classof(const VPRecipeBase *R) {
11081109
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
11091110
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1111+
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
11101112
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
11111113
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
11121114
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1410,11 +1412,16 @@ class VPInstruction : public VPRecipeWithIRFlags {
14101412
class VPWidenRecipe : public VPRecipeWithIRFlags {
14111413
unsigned Opcode;
14121414

1415+
protected:
1416+
template <typename IterT>
1417+
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1418+
iterator_range<IterT> Operands)
1419+
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1420+
14131421
public:
14141422
template <typename IterT>
14151423
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1416-
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1417-
Opcode(I.getOpcode()) {}
1424+
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
14181425

14191426
~VPWidenRecipe() override = default;
14201427

@@ -1424,7 +1431,15 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14241431
return R;
14251432
}
14261433

1427-
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1434+
static inline bool classof(const VPRecipeBase *R) {
1435+
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1436+
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1437+
}
1438+
1439+
static inline bool classof(const VPUser *U) {
1440+
auto *R = dyn_cast<VPRecipeBase>(U);
1441+
return R && classof(R);
1442+
}
14281443

14291444
/// Produce a widened instruction using the opcode and operands of the recipe,
14301445
/// processing State.VF elements.
@@ -1443,6 +1458,54 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14431458
#endif
14441459
};
14451460

1461+
/// A recipe for widening operations with vector-predication intrinsics with
1462+
/// explicit vector length (EVL).
1463+
class VPWidenEVLRecipe : public VPWidenRecipe {
1464+
using VPRecipeWithIRFlags::transferFlags;
1465+
1466+
public:
1467+
template <typename IterT>
1468+
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1469+
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1470+
addOperand(&EVL);
1471+
}
1472+
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1473+
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1474+
transferFlags(W);
1475+
}
1476+
1477+
~VPWidenEVLRecipe() override = default;
1478+
1479+
VPWidenRecipe *clone() override final {
1480+
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1481+
return nullptr;
1482+
}
1483+
1484+
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1485+
1486+
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1487+
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1488+
1489+
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
1490+
/// processing EVL elements.
1491+
void execute(VPTransformState &State) override final;
1492+
1493+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1494+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1495+
assert(is_contained(operands(), Op) &&
1496+
"Op must be an operand of the recipe");
1497+
// EVL in that recipe is always the last operand, thus any use before means
1498+
// the VPValue should be vectorized.
1499+
return getEVL() == Op;
1500+
}
1501+
1502+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1503+
/// Print the recipe.
1504+
void print(raw_ostream &O, const Twine &Indent,
1505+
VPSlotTracker &SlotTracker) const override final;
1506+
#endif
1507+
};
1508+
14461509
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
14471510
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
14481511
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,9 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
263263
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
264264
return inferScalarType(R->getOperand(0));
265265
})
266-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
267-
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
266+
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
267+
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
268+
VPWidenSelectRecipe>(
268269
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
269270
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
270271
// TODO: Use info from interleave group.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/IR/Instructions.h"
2525
#include "llvm/IR/Type.h"
2626
#include "llvm/IR/Value.h"
27+
#include "llvm/IR/VectorBuilder.h"
2728
#include "llvm/Support/Casting.h"
2829
#include "llvm/Support/CommandLine.h"
2930
#include "llvm/Support/Debug.h"
@@ -74,6 +75,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7475
case VPWidenLoadSC:
7576
case VPWidenPHISC:
7677
case VPWidenSC:
78+
case VPWidenEVLSC:
7779
case VPWidenSelectSC: {
7880
const Instruction *I =
7981
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -114,6 +116,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
114116
case VPWidenIntOrFpInductionSC:
115117
case VPWidenPHISC:
116118
case VPWidenSC:
119+
case VPWidenEVLSC:
117120
case VPWidenSelectSC: {
118121
const Instruction *I =
119122
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -164,6 +167,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
164167
case VPWidenPHISC:
165168
case VPWidenPointerInductionSC:
166169
case VPWidenSC:
170+
case VPWidenEVLSC:
167171
case VPWidenSelectSC: {
168172
const Instruction *I =
169173
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -1262,6 +1266,45 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
12621266
}
12631267
}
12641268

1269+
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1270+
unsigned Opcode = getOpcode();
1271+
// TODO: Support other opcodes
1272+
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1273+
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1274+
1275+
State.setDebugLocFrom(getDebugLoc());
1276+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
1277+
"explicit vector length.");
1278+
VPValue *Op0 = getOperand(0);
1279+
1280+
assert(State.get(Op0, 0)->getType()->isVectorTy() &&
1281+
"VPWidenEVLRecipe should not be used for scalars");
1282+
1283+
VPValue *EVL = getEVL();
1284+
Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
1285+
IRBuilderBase &BuilderIR = State.Builder;
1286+
VectorBuilder Builder(BuilderIR);
1287+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1288+
1289+
SmallVector<Value *, 4> Ops;
1290+
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1291+
VPValue *VPOp = getOperand(I);
1292+
Ops.push_back(State.get(VPOp, 0));
1293+
}
1294+
1295+
Builder.setMask(Mask).setEVL(EVLArg);
1296+
Value *VPInst =
1297+
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1298+
// Currently vp-intrinsics only accept FMF flags.
1299+
// TODO: Enable other flags when support is added.
1300+
if (isa<FPMathOperator>(VPInst))
1301+
setFlags(cast<Instruction>(VPInst));
1302+
1303+
State.set(this, VPInst, 0);
1304+
State.addMetadata(VPInst,
1305+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1306+
}
1307+
12651308
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12661309
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
12671310
VPSlotTracker &SlotTracker) const {
@@ -1271,6 +1314,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
12711314
printFlags(O);
12721315
printOperands(O, SlotTracker);
12731316
}
1317+
1318+
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1319+
VPSlotTracker &SlotTracker) const {
1320+
O << Indent << "WIDEN-VP ";
1321+
printAsOperand(O, SlotTracker);
1322+
O << " = " << Instruction::getOpcodeName(getOpcode());
1323+
printFlags(O);
1324+
printOperands(O, SlotTracker);
1325+
}
12741326
#endif
12751327

12761328
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "VPlanTransforms.h"
1515
#include "VPRecipeBuilder.h"
16+
#include "VPlan.h"
1617
#include "VPlanAnalysis.h"
1718
#include "VPlanCFG.h"
1819
#include "VPlanDominatorTree.h"
@@ -21,6 +22,7 @@
2122
#include "llvm/ADT/PostOrderIterator.h"
2223
#include "llvm/ADT/STLExtras.h"
2324
#include "llvm/ADT/SetVector.h"
25+
#include "llvm/ADT/TypeSwitch.h"
2426
#include "llvm/Analysis/IVDescriptors.h"
2527
#include "llvm/Analysis/VectorUtils.h"
2628
#include "llvm/IR/Intrinsics.h"
@@ -1316,6 +1318,63 @@ void VPlanTransforms::addActiveLaneMask(
13161318
HeaderMask->replaceAllUsesWith(LaneMask);
13171319
}
13181320

1321+
/// Replace recipes with their EVL variants.
1322+
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1323+
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1324+
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1325+
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1326+
auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
1327+
if (!CurRecipe)
1328+
continue;
1329+
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1330+
assert(OrigMask && "Unmasked recipe when folding tail");
1331+
return HeaderMask == OrigMask ? nullptr : OrigMask;
1332+
};
1333+
1334+
VPRecipeBase *NewRecipe =
1335+
TypeSwitch<VPRecipeBase *, VPRecipeBase *>(CurRecipe)
1336+
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
1337+
VPValue *NewMask = GetNewMask(L->getMask());
1338+
return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
1339+
})
1340+
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
1341+
VPValue *NewMask = GetNewMask(S->getMask());
1342+
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
1343+
})
1344+
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
1345+
unsigned Opcode = W->getOpcode();
1346+
if (!Instruction::isBinaryOp(Opcode) &&
1347+
!Instruction::isUnaryOp(Opcode))
1348+
return nullptr;
1349+
return new VPWidenEVLRecipe(*W, EVL);
1350+
})
1351+
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
1352+
VPValue *NewMask = GetNewMask(Red->getCondOp());
1353+
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
1354+
})
1355+
.Default([&](VPRecipeBase *R) { return nullptr; });
1356+
1357+
if (!NewRecipe)
1358+
continue;
1359+
1360+
[[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
1361+
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1362+
"New recipe must define the same number of values as the "
1363+
"original.");
1364+
assert(
1365+
NumDefVal <= 1 &&
1366+
"Only supports recipes with a single definition or without users.");
1367+
NewRecipe->insertBefore(CurRecipe);
1368+
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
1369+
VPValue *CurVPV = CurRecipe->getVPSingleValue();
1370+
CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
1371+
}
1372+
CurRecipe->eraseFromParent();
1373+
}
1374+
recursivelyDeleteDeadRecipes(HeaderMask);
1375+
}
1376+
}
1377+
13191378
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
13201379
/// replaces all uses except the canonical IV increment of
13211380
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1385,48 +1444,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
13851444
NextEVLIV->insertBefore(CanonicalIVIncrement);
13861445
EVLPhi->addOperand(NextEVLIV);
13871446

1388-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1389-
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1390-
VPRecipeBase *NewRecipe = nullptr;
1391-
auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
1392-
if (!CurRecipe)
1393-
continue;
1394-
1395-
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1396-
assert(OrigMask && "Unmasked recipe when folding tail");
1397-
return HeaderMask == OrigMask ? nullptr : OrigMask;
1398-
};
1399-
if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(CurRecipe)) {
1400-
VPValue *NewMask = GetNewMask(MemR->getMask());
1401-
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR))
1402-
NewRecipe = new VPWidenLoadEVLRecipe(*L, *VPEVL, NewMask);
1403-
else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR))
1404-
NewRecipe = new VPWidenStoreEVLRecipe(*S, *VPEVL, NewMask);
1405-
else
1406-
llvm_unreachable("unsupported recipe");
1407-
} else if (auto *RedR = dyn_cast<VPReductionRecipe>(CurRecipe)) {
1408-
NewRecipe = new VPReductionEVLRecipe(*RedR, *VPEVL,
1409-
GetNewMask(RedR->getCondOp()));
1410-
}
1447+
transformRecipestoEVLRecipes(Plan, *VPEVL);
14111448

1412-
if (NewRecipe) {
1413-
[[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
1414-
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1415-
"New recipe must define the same number of values as the "
1416-
"original.");
1417-
assert(
1418-
NumDefVal <= 1 &&
1419-
"Only supports recipes with a single definition or without users.");
1420-
NewRecipe->insertBefore(CurRecipe);
1421-
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
1422-
VPValue *CurVPV = CurRecipe->getVPSingleValue();
1423-
CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
1424-
}
1425-
CurRecipe->eraseFromParent();
1426-
}
1427-
}
1428-
recursivelyDeleteDeadRecipes(HeaderMask);
1429-
}
14301449
// Replace all uses of VPCanonicalIVPHIRecipe by
14311450
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
14321451
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class VPDef {
356356
VPWidenStoreEVLSC,
357357
VPWidenStoreSC,
358358
VPWidenSC,
359+
VPWidenEVLSC,
359360
VPWidenSelectSC,
360361
VPBlendSC,
361362
// START: Phi-like recipes. Need to be kept together.

0 commit comments

Comments
 (0)