@@ -997,6 +997,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
997
997
// These operations are not supported on NEON but SVE can do them.
998
998
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
999
999
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1000
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1001
+ setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1002
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1003
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1000
1004
setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1001
1005
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1002
1006
setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
@@ -1118,6 +1122,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1118
1122
setOperationAction(ISD::LOAD, VT, Custom);
1119
1123
setOperationAction(ISD::MUL, VT, Custom);
1120
1124
setOperationAction(ISD::OR, VT, Custom);
1125
+ setOperationAction(ISD::SDIV, VT, Custom);
1121
1126
setOperationAction(ISD::SETCC, VT, Custom);
1122
1127
setOperationAction(ISD::SHL, VT, Custom);
1123
1128
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -1134,10 +1139,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1134
1139
setOperationAction(ISD::UMIN, VT, Custom);
1135
1140
setOperationAction(ISD::XOR, VT, Custom);
1136
1141
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1137
-
1138
- if (VT.getVectorElementType() == MVT::i32 ||
1139
- VT.getVectorElementType() == MVT::i64)
1140
- setOperationAction(ISD::SDIV, VT, Custom);
1141
1142
}
1142
1143
1143
1144
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -8934,14 +8935,15 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
8934
8935
8935
8936
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
8936
8937
EVT VT = Op.getValueType();
8938
+
8939
+ if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
8940
+ return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
8941
+
8942
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
8943
+
8937
8944
bool Signed = Op.getOpcode() == ISD::SDIV;
8938
8945
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
8939
8946
8940
- if (useSVEForFixedLengthVectorVT(Op.getValueType(), /*OverrideNEON=*/true) &&
8941
- (VT.getVectorElementType() == MVT::i32 ||
8942
- VT.getVectorElementType() == MVT::i64))
8943
- return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
8944
-
8945
8947
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
8946
8948
return LowerToPredicatedOp(Op, DAG, PredOpcode);
8947
8949
@@ -15349,6 +15351,56 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
15349
15351
Store->isTruncatingStore());
15350
15352
}
15351
15353
15354
+ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
15355
+ SDValue Op, SelectionDAG &DAG) const {
15356
+ SDLoc dl(Op);
15357
+ EVT VT = Op.getValueType();
15358
+ EVT EltVT = VT.getVectorElementType();
15359
+
15360
+ bool Signed = Op.getOpcode() == ISD::SDIV;
15361
+ unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
15362
+
15363
+ // Scalable vector i32/i64 DIV is supported.
15364
+ if (EltVT == MVT::i32 || EltVT == MVT::i64)
15365
+ return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
15366
+
15367
+ // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
15368
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
15369
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
15370
+ EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
15371
+ EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
15372
+
15373
+ // Convert the operands to scalable vectors.
15374
+ SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
15375
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
15376
+
15377
+ // Extend the scalable operands.
15378
+ unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
15379
+ unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
15380
+ SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
15381
+ SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
15382
+ SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
15383
+ SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
15384
+
15385
+ // Convert back to fixed vectors so the DIV can be further lowered.
15386
+ Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
15387
+ Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
15388
+ Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
15389
+ Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
15390
+ SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
15391
+ Op0Lo, Op1Lo);
15392
+ SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
15393
+ Op0Hi, Op1Hi);
15394
+
15395
+ // Convert again to scalable vectors to truncate.
15396
+ ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
15397
+ ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
15398
+ SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
15399
+ ResultLo, ResultHi);
15400
+
15401
+ return convertFromScalableVector(DAG, VT, ScalableResult);
15402
+ }
15403
+
15352
15404
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
15353
15405
SDValue Op, SelectionDAG &DAG) const {
15354
15406
EVT VT = Op.getValueType();
0 commit comments