Skip to content

Commit ac63959

Browse files
author
Cameron McInally
committed
[SVE] Lower fixed length vXi8/vXi16 SDIV to scalable
There are no nxv16i8/nxv8i16 SDIV instructions, so these fixed width operations must be promoted to nxv4i32. Differential Revision: https://reviews.llvm.org/D86114
1 parent ffadd30 commit ac63959

File tree

3 files changed

+400
-9
lines changed

3 files changed

+400
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
997997
// These operations are not supported on NEON but SVE can do them.
998998
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
999999
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1000+
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1001+
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1002+
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1003+
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
10001004
setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
10011005
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
10021006
setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
@@ -1118,6 +1122,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
11181122
setOperationAction(ISD::LOAD, VT, Custom);
11191123
setOperationAction(ISD::MUL, VT, Custom);
11201124
setOperationAction(ISD::OR, VT, Custom);
1125+
setOperationAction(ISD::SDIV, VT, Custom);
11211126
setOperationAction(ISD::SETCC, VT, Custom);
11221127
setOperationAction(ISD::SHL, VT, Custom);
11231128
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -1134,10 +1139,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
11341139
setOperationAction(ISD::UMIN, VT, Custom);
11351140
setOperationAction(ISD::XOR, VT, Custom);
11361141
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1137-
1138-
if (VT.getVectorElementType() == MVT::i32 ||
1139-
VT.getVectorElementType() == MVT::i64)
1140-
setOperationAction(ISD::SDIV, VT, Custom);
11411142
}
11421143

11431144
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -8934,14 +8935,15 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
89348935

89358936
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
89368937
EVT VT = Op.getValueType();
8938+
8939+
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
8940+
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
8941+
8942+
assert(VT.isScalableVector() && "Expected a scalable vector.");
8943+
89378944
bool Signed = Op.getOpcode() == ISD::SDIV;
89388945
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
89398946

8940-
if (useSVEForFixedLengthVectorVT(Op.getValueType(), /*OverrideNEON=*/true) &&
8941-
(VT.getVectorElementType() == MVT::i32 ||
8942-
VT.getVectorElementType() == MVT::i64))
8943-
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
8944-
89458947
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
89468948
return LowerToPredicatedOp(Op, DAG, PredOpcode);
89478949

@@ -15349,6 +15351,56 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
1534915351
Store->isTruncatingStore());
1535015352
}
1535115353

15354+
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
15355+
SDValue Op, SelectionDAG &DAG) const {
15356+
SDLoc dl(Op);
15357+
EVT VT = Op.getValueType();
15358+
EVT EltVT = VT.getVectorElementType();
15359+
15360+
bool Signed = Op.getOpcode() == ISD::SDIV;
15361+
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
15362+
15363+
// Scalable vector i32/i64 DIV is supported.
15364+
if (EltVT == MVT::i32 || EltVT == MVT::i64)
15365+
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
15366+
15367+
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
15368+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
15369+
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
15370+
EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
15371+
EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
15372+
15373+
// Convert the operands to scalable vectors.
15374+
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
15375+
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
15376+
15377+
// Extend the scalable operands.
15378+
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
15379+
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
15380+
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
15381+
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
15382+
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
15383+
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
15384+
15385+
// Convert back to fixed vectors so the DIV can be further lowered.
15386+
Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
15387+
Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
15388+
Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
15389+
Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
15390+
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
15391+
Op0Lo, Op1Lo);
15392+
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
15393+
Op0Hi, Op1Hi);
15394+
15395+
// Convert again to scalable vectors to truncate.
15396+
ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
15397+
ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
15398+
SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
15399+
ResultLo, ResultHi);
15400+
15401+
return convertFromScalableVector(DAG, VT, ScalableResult);
15402+
}
15403+
1535215404
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
1535315405
SDValue Op, SelectionDAG &DAG) const {
1535415406
EVT VT = Op.getValueType();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,8 @@ class AArch64TargetLowering : public TargetLowering {
904904
SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
905905
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
906906

907+
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
908+
SelectionDAG &DAG) const;
907909
SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
908910
SelectionDAG &DAG) const;
909911
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)