Skip to content

Commit 00baa1a

Browse files
authored
[DAG][RISCV] Use vp_reduce_* when widening illegal types for reductions (#105455)
This allows the use a single wider operation with a restricted EVL instead of padding the vector with the neutral element. For RISCV specifically, it's worth noting that an alternate padded lowering is available when VL is one less than a power of two, and LMUL <= m1. We could slide the vector operand up by one, and insert the padding via a vslide1up. We don't currently pattern match this, but we could. This form would arguably be better iff the surrounding code wanted VL=4. This patch will force a VL toggle in that case instead. Basically, it comes down to a question of whether we think odd sized vectors are going to appear clustered with odd size vector operations, or mixed in with larger power of two operations. Note there is a potential downside of using vp nodes; we loose any generic DAG combines which might have applied to the widened form.
1 parent 41dcdfb commit 00baa1a

File tree

6 files changed

+158
-202
lines changed

6 files changed

+158
-202
lines changed

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -651,63 +651,64 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
651651
#error \
652652
"The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
653653
#endif
654-
#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN) \
654+
#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN, SDOPC) \
655655
BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, 1) \
656656
VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
657+
VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
657658
VP_PROPERTY_REDUCTION(0, 1) \
658659
END_REGISTER_VP(VPID, VPSD)
659660

660661
// llvm.vp.reduce.add(start,x,mask,vlen)
661662
HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
662-
vector_reduce_add)
663+
vector_reduce_add, VECREDUCE_ADD)
663664

664665
// llvm.vp.reduce.mul(start,x,mask,vlen)
665666
HELPER_REGISTER_REDUCTION_VP(vp_reduce_mul, VP_REDUCE_MUL,
666-
vector_reduce_mul)
667+
vector_reduce_mul, VECREDUCE_MUL)
667668

668669
// llvm.vp.reduce.and(start,x,mask,vlen)
669670
HELPER_REGISTER_REDUCTION_VP(vp_reduce_and, VP_REDUCE_AND,
670-
vector_reduce_and)
671+
vector_reduce_and, VECREDUCE_AND)
671672

672673
// llvm.vp.reduce.or(start,x,mask,vlen)
673674
HELPER_REGISTER_REDUCTION_VP(vp_reduce_or, VP_REDUCE_OR,
674-
vector_reduce_or)
675+
vector_reduce_or, VECREDUCE_OR)
675676

676677
// llvm.vp.reduce.xor(start,x,mask,vlen)
677678
HELPER_REGISTER_REDUCTION_VP(vp_reduce_xor, VP_REDUCE_XOR,
678-
vector_reduce_xor)
679+
vector_reduce_xor, VECREDUCE_XOR)
679680

680681
// llvm.vp.reduce.smax(start,x,mask,vlen)
681682
HELPER_REGISTER_REDUCTION_VP(vp_reduce_smax, VP_REDUCE_SMAX,
682-
vector_reduce_smax)
683+
vector_reduce_smax, VECREDUCE_SMAX)
683684

684685
// llvm.vp.reduce.smin(start,x,mask,vlen)
685686
HELPER_REGISTER_REDUCTION_VP(vp_reduce_smin, VP_REDUCE_SMIN,
686-
vector_reduce_smin)
687+
vector_reduce_smin, VECREDUCE_SMIN)
687688

688689
// llvm.vp.reduce.umax(start,x,mask,vlen)
689690
HELPER_REGISTER_REDUCTION_VP(vp_reduce_umax, VP_REDUCE_UMAX,
690-
vector_reduce_umax)
691+
vector_reduce_umax, VECREDUCE_UMAX)
691692

692693
// llvm.vp.reduce.umin(start,x,mask,vlen)
693694
HELPER_REGISTER_REDUCTION_VP(vp_reduce_umin, VP_REDUCE_UMIN,
694-
vector_reduce_umin)
695+
vector_reduce_umin, VECREDUCE_UMIN)
695696

696697
// llvm.vp.reduce.fmax(start,x,mask,vlen)
697698
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
698-
vector_reduce_fmax)
699+
vector_reduce_fmax, VECREDUCE_FMAX)
699700

700701
// llvm.vp.reduce.fmin(start,x,mask,vlen)
701702
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
702-
vector_reduce_fmin)
703+
vector_reduce_fmin, VECREDUCE_FMIN)
703704

704705
// llvm.vp.reduce.fmaximum(start,x,mask,vlen)
705706
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmaximum, VP_REDUCE_FMAXIMUM,
706-
vector_reduce_fmaximum)
707+
vector_reduce_fmaximum, VECREDUCE_FMAXIMUM)
707708

708709
// llvm.vp.reduce.fminimum(start,x,mask,vlen)
709710
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fminimum, VP_REDUCE_FMINIMUM,
710-
vector_reduce_fminimum)
711+
vector_reduce_fminimum, VECREDUCE_FMINIMUM)
711712

712713
#undef HELPER_REGISTER_REDUCTION_VP
713714

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7271,9 +7271,29 @@ SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
72717271
return DAG.getBuildVector(VT, dl, Scalars);
72727272
}
72737273

7274+
static unsigned getExtendForIntVecReduction(unsigned Opc) {
7275+
switch (Opc) {
7276+
default:
7277+
llvm_unreachable("Expected integer vector reduction");
7278+
case ISD::VECREDUCE_ADD:
7279+
case ISD::VECREDUCE_MUL:
7280+
case ISD::VECREDUCE_AND:
7281+
case ISD::VECREDUCE_OR:
7282+
case ISD::VECREDUCE_XOR:
7283+
return ISD::ANY_EXTEND;
7284+
case ISD::VECREDUCE_SMAX:
7285+
case ISD::VECREDUCE_SMIN:
7286+
return ISD::SIGN_EXTEND;
7287+
case ISD::VECREDUCE_UMAX:
7288+
case ISD::VECREDUCE_UMIN:
7289+
return ISD::ZERO_EXTEND;
7290+
}
7291+
}
7292+
72747293
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
72757294
SDLoc dl(N);
72767295
SDValue Op = GetWidenedVector(N->getOperand(0));
7296+
EVT VT = N->getValueType(0);
72777297
EVT OrigVT = N->getOperand(0).getValueType();
72787298
EVT WideVT = Op.getValueType();
72797299
EVT ElemVT = OrigVT.getVectorElementType();
@@ -7288,6 +7308,25 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
72887308
unsigned OrigElts = OrigVT.getVectorMinNumElements();
72897309
unsigned WideElts = WideVT.getVectorMinNumElements();
72907310

7311+
// Generate a vp.reduce_op if it is custom/legal for the target. This avoids
7312+
// needing to pad the source vector, because the inactive lanes can simply be
7313+
// disabled and not contribute to the result.
7314+
// TODO: VECREDUCE_FADD, VECREDUCE_FMUL aren't currently mapped correctly,
7315+
// and thus don't take this path.
7316+
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
7317+
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
7318+
SDValue Start = NeutralElem;
7319+
if (VT.isInteger())
7320+
Start = DAG.getNode(getExtendForIntVecReduction(Opc), dl, VT, Start);
7321+
assert(Start.getValueType() == VT);
7322+
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7323+
WideVT.getVectorElementCount());
7324+
SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
7325+
SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
7326+
OrigVT.getVectorElementCount());
7327+
return DAG.getNode(*VPOpcode, dl, VT, {Start, Op, Mask, EVL}, Flags);
7328+
}
7329+
72917330
if (WideVT.isScalableVector()) {
72927331
unsigned GCD = std::gcd(OrigElts, WideElts);
72937332
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
@@ -7296,14 +7335,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
72967335
for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
72977336
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
72987337
DAG.getVectorIdxConstant(Idx, dl));
7299-
return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
7338+
return DAG.getNode(Opc, dl, VT, Op, Flags);
73007339
}
73017340

73027341
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
73037342
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
73047343
DAG.getVectorIdxConstant(Idx, dl));
73057344

7306-
return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
7345+
return DAG.getNode(Opc, dl, VT, Op, Flags);
73077346
}
73087347

73097348
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {

0 commit comments

Comments
 (0)