Skip to content

Commit 9b4de7d

Browse files
authored
[RISCV] Lower PARTIAL_REDUCE_[S/U]MLA via zvqdotq (#140950)
The semantics of the PARTIAL_REDUCE_SMLA with i32 result element, and i8 sources corresponds to vqdot. Analogously PARTIAL_REDUCE_UMLA corresponds to vqdotu. There is currently no vqdotsu equivalent. This patch is a starting place. We can extend this quite a bit more, and I plan to take a look at the fixed vector lowering, the TTI hook to drive loop vectorizer, and to try to integrate the reduction based lowering I'd added for zvqdotq into this flow.
1 parent c432936 commit 9b4de7d

File tree

3 files changed

+356
-209
lines changed

3 files changed

+356
-209
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,6 +1571,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
15711571
setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
15721572
}
15731573

1574+
// zve32x is broken for partial_reduce_umla, but let's not make it worse.
1575+
if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1576+
setPartialReduceMLAAction(MVT::nxv1i32, MVT::nxv4i8, Custom);
1577+
setPartialReduceMLAAction(MVT::nxv2i32, MVT::nxv8i8, Custom);
1578+
setPartialReduceMLAAction(MVT::nxv4i32, MVT::nxv16i8, Custom);
1579+
setPartialReduceMLAAction(MVT::nxv8i32, MVT::nxv32i8, Custom);
1580+
setPartialReduceMLAAction(MVT::nxv16i32, MVT::nxv64i8, Custom);
1581+
}
1582+
15741583
// Function alignments.
15751584
const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
15761585
setMinFunctionAlignment(FunctionAlignment);
@@ -8229,6 +8238,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
82298238
return lowerINIT_TRAMPOLINE(Op, DAG);
82308239
case ISD::ADJUST_TRAMPOLINE:
82318240
return lowerADJUST_TRAMPOLINE(Op, DAG);
8241+
case ISD::PARTIAL_REDUCE_UMLA:
8242+
case ISD::PARTIAL_REDUCE_SMLA:
8243+
return lowerPARTIAL_REDUCE_MLA(Op, DAG);
82328244
}
82338245
}
82348246

@@ -8364,6 +8376,27 @@ SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
83648376
return Op.getOperand(0);
83658377
}
83668378

8379+
SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8380+
SelectionDAG &DAG) const {
8381+
// Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8382+
// TODO: There are many other sub-cases we could potentially lower, are
8383+
// any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8384+
// TODO: PARTIAL_REDUCE_*MLA can't represent a vqdotsu currently.
8385+
SDLoc DL(Op);
8386+
MVT VT = Op.getSimpleValueType();
8387+
SDValue Accum = Op.getOperand(0);
8388+
assert(Accum.getSimpleValueType() == VT &&
8389+
VT.getVectorElementType() == MVT::i32);
8390+
SDValue A = Op.getOperand(1);
8391+
SDValue B = Op.getOperand(2);
8392+
assert(A.getSimpleValueType() == B.getSimpleValueType() &&
8393+
A.getSimpleValueType().getVectorElementType() == MVT::i8);
8394+
bool IsSigned = Op.getOpcode() == ISD::PARTIAL_REDUCE_SMLA;
8395+
unsigned Opc = IsSigned ? RISCVISD::VQDOT_VL : RISCVISD::VQDOTU_VL;
8396+
auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
8397+
return DAG.getNode(Opc, DL, VT, {A, B, Accum, Mask, VL});
8398+
}
8399+
83678400
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
83688401
SelectionDAG &DAG, unsigned Flags) {
83698402
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,7 @@ class RISCVTargetLowering : public TargetLowering {
552552

553553
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
554554
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
555+
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
555556

556557
bool isEligibleForTailCallOptimization(
557558
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

0 commit comments

Comments
 (0)