Skip to content

Commit b63259a

Browse files
committed
[VP][EVL] Support select instruction with EVL-vectorization
1 parent 89622e9 commit b63259a

File tree

7 files changed

+126
-8
lines changed

7 files changed

+126
-8
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
889889
case VPRecipeBase::VPWidenSC:
890890
case VPRecipeBase::VPWidenEVLSC:
891891
case VPRecipeBase::VPWidenSelectSC:
892+
case VPRecipeBase::VPWidenSelectEVLSC:
892893
case VPRecipeBase::VPBlendSC:
893894
case VPRecipeBase::VPPredInstPHISC:
894895
case VPRecipeBase::VPCanonicalIVPHISC:
@@ -1666,10 +1667,17 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
16661667

16671668
/// A recipe for widening select instructions.
16681669
struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1670+
1671+
protected:
1672+
template <typename IterT>
1673+
VPWidenSelectRecipe(unsigned VPDefOpcode, SelectInst &I,
1674+
iterator_range<IterT> Operands)
1675+
: VPSingleDefRecipe(VPDefOpcode, Operands, &I, I.getDebugLoc()) {}
1676+
1677+
public:
16691678
template <typename IterT>
16701679
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1671-
: VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1672-
I.getDebugLoc()) {}
1680+
: VPWidenSelectRecipe(VPDef::VPWidenSelectSC, I, Operands) {}
16731681

16741682
~VPWidenSelectRecipe() override = default;
16751683

@@ -1678,7 +1686,15 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
16781686
operands());
16791687
}
16801688

1681-
VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1689+
static inline bool classof(const VPRecipeBase *R) {
1690+
return R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
1691+
R->getVPDefID() == VPRecipeBase::VPWidenSelectEVLSC;
1692+
}
1693+
1694+
static inline bool classof(const VPUser *U) {
1695+
auto *R = dyn_cast<VPRecipeBase>(U);
1696+
return R && classof(R);
1697+
}
16821698

16831699
/// Produce a widened version of the select instruction.
16841700
void execute(VPTransformState &State) override;
@@ -1698,6 +1714,52 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
16981714
}
16991715
};
17001716

1717+
// A recipe for widening select instruction with vector-predication intrinsics
1718+
// with explicit vector length (EVL).
1719+
struct VPWidenSelectEVLRecipe : public VPWidenSelectRecipe {
1720+
1721+
template <typename IterT>
1722+
VPWidenSelectEVLRecipe(SelectInst &I, iterator_range<IterT> Operands,
1723+
VPValue &EVL)
1724+
: VPWidenSelectRecipe(VPDef::VPWidenSelectEVLSC, I, Operands) {
1725+
addOperand(&EVL);
1726+
}
1727+
1728+
VPWidenSelectEVLRecipe(VPWidenSelectRecipe &W, VPValue &EVL)
1729+
: VPWidenSelectEVLRecipe(*cast<SelectInst>(W.getUnderlyingInstr()),
1730+
W.operands(), EVL) {}
1731+
1732+
~VPWidenSelectEVLRecipe() override = default;
1733+
1734+
VPWidenSelectEVLRecipe *clone() final {
1735+
llvm_unreachable("VPWidenSelectEVLRecipe cannot be cloned");
1736+
return nullptr;
1737+
}
1738+
1739+
VP_CLASSOF_IMPL(VPDef::VPWidenSelectEVLSC)
1740+
1741+
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1742+
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1743+
1744+
/// Produce a vp-intrinsic version of the select instruction.
1745+
void execute(VPTransformState &State) final;
1746+
1747+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1748+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1749+
assert(is_contained(operands(), Op) &&
1750+
"Op must be an operand of the recipe");
1751+
// EVL in that recipe is always the last operand, thus any use before means
1752+
// the VPValue should be vectorized.
1753+
return getEVL() == Op;
1754+
}
1755+
1756+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1757+
/// Print the recipe.
1758+
void print(raw_ostream &O, const Twine &Indent,
1759+
VPSlotTracker &SlotTracker) const final;
1760+
#endif
1761+
};
1762+
17011763
/// A recipe for handling GEP instructions.
17021764
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
17031765
bool isPointerLoopInvariant() const {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
9494
case VPWidenPHISC:
9595
case VPWidenSC:
9696
case VPWidenEVLSC:
97-
case VPWidenSelectSC: {
97+
case VPWidenSelectSC:
98+
case VPWidenSelectEVLSC: {
9899
const Instruction *I =
99100
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
100101
(void)I;
@@ -135,7 +136,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
135136
case VPWidenPHISC:
136137
case VPWidenSC:
137138
case VPWidenEVLSC:
138-
case VPWidenSelectSC: {
139+
case VPWidenSelectSC:
140+
case VPWidenSelectEVLSC: {
139141
const Instruction *I =
140142
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
141143
(void)I;
@@ -172,7 +174,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
172174
case VPWidenPointerInductionSC:
173175
case VPWidenSC:
174176
case VPWidenEVLSC:
175-
case VPWidenSelectSC: {
177+
case VPWidenSelectSC:
178+
case VPWidenSelectEVLSC: {
176179
const Instruction *I =
177180
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
178181
(void)I;
@@ -1038,6 +1041,21 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
10381041
getOperand(2)->printAsOperand(O, SlotTracker);
10391042
O << (isInvariantCond() ? " (condition is loop invariant)" : "");
10401043
}
1044+
1045+
void VPWidenSelectEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1046+
VPSlotTracker &SlotTracker) const {
1047+
O << Indent << "WIDEN-SELECT ";
1048+
printAsOperand(O, SlotTracker);
1049+
O << " = vp.select ";
1050+
getOperand(0)->printAsOperand(O, SlotTracker);
1051+
O << ", ";
1052+
getOperand(1)->printAsOperand(O, SlotTracker);
1053+
O << ", ";
1054+
getOperand(2)->printAsOperand(O, SlotTracker);
1055+
O << ", ";
1056+
getOperand(3)->printAsOperand(O, SlotTracker);
1057+
O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1058+
}
10411059
#endif
10421060

10431061
void VPWidenSelectRecipe::execute(VPTransformState &State) {
@@ -1058,6 +1076,35 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
10581076
State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
10591077
}
10601078

1079+
void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
1080+
State.setDebugLocFrom(getDebugLoc());
1081+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
1082+
"explicit vector length.");
1083+
1084+
Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true);
1085+
IRBuilderBase &BuilderIR = State.Builder;
1086+
VectorBuilder Builder(BuilderIR);
1087+
Builder.setEVL(EVLArg);
1088+
// The condition can be loop invariant but still defined inside the
1089+
// loop. This means that we can't just use the original 'cond' value.
1090+
// We have to take the 'vectorized' value and pick the first lane.
1091+
// Instcombine will make this a no-op.
1092+
auto *InvarCond =
1093+
isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
1094+
1095+
Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
1096+
if (!isa<VectorType>(Cond->getType()))
1097+
Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");
1098+
1099+
Value *Op0 = State.get(getOperand(1), 0);
1100+
Value *Op1 = State.get(getOperand(2), 0);
1101+
Value *VPInst = Builder.createVectorInstruction(
1102+
Instruction::Select, Op0->getType(), {Cond, Op0, Op1}, "vp.select");
1103+
State.set(this, VPInst, 0);
1104+
State.addMetadata(VPInst,
1105+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1106+
}
1107+
10611108
VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
10621109
const FastMathFlags &FMF) {
10631110
AllowReassoc = FMF.allowReassoc();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,6 +1379,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13791379
return nullptr;
13801380
return new VPWidenEVLRecipe(*W, EVL);
13811381
})
1382+
.Case<VPWidenSelectRecipe>(
1383+
[&](VPWidenSelectRecipe *W) -> VPRecipeBase * {
1384+
return new VPWidenSelectEVLRecipe(*W, EVL);
1385+
})
13821386
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
13831387
VPValue *NewMask = GetNewMask(Red->getCondOp());
13841388
return new VPReductionEVLRecipe(*Red, EVL, NewMask);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ class VPDef {
357357
VPWidenSC,
358358
VPWidenEVLSC,
359359
VPWidenSelectSC,
360+
VPWidenSelectEVLSC,
360361
VPBlendSC,
361362
// START: Phi-like recipes. Need to be kept together.
362363
VPWidenPHISC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
148148
return VerifyEVLUse(
149149
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
150150
})
151+
.Case<VPWidenSelectEVLRecipe>(
152+
[&](const VPWidenSelectEVLRecipe *S) {
153+
return VerifyEVLUse(*S, 3);
154+
})
151155
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
152156
return VerifyEVLUse(*R, 2);
153157
})

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
7070
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
7171
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
7272
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
73-
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
73+
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
7474
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
7575
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
7676
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
3333
; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
3434
; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
3535
; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
36-
; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
36+
; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5>
3737
; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
3838
; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
3939
; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15>

0 commit comments

Comments
 (0)