@@ -1813,8 +1813,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1813
1813
1814
1814
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1815
1815
EVT OpVT) const {
1816
- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1817
- if (!Subtarget->hasSVE ())
1816
+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1817
+ if (!Subtarget->hasSVEorSME ())
1818
1818
return true;
1819
1819
1820
1820
// We can only support legal predicate result types. We can use the SVE
@@ -20004,47 +20004,98 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
20004
20004
return SDValue();
20005
20005
}
20006
20006
20007
- static SDValue performIntrinsicCombine (SDNode *N,
20008
- TargetLowering::DAGCombinerInfo &DCI,
20009
- const AArch64Subtarget *Subtarget) {
20007
+ static SDValue tryCombineGetActiveLaneMask (SDNode *N,
20008
+ TargetLowering::DAGCombinerInfo &DCI,
20009
+ const AArch64Subtarget *Subtarget) {
20010
20010
SelectionDAG &DAG = DCI.DAG;
20011
- unsigned IID = getIntrinsicID(N);
20012
- switch (IID) {
20013
- default:
20014
- break;
20015
- case Intrinsic::get_active_lane_mask: {
20016
- SDValue Res = SDValue();
20017
- EVT VT = N->getValueType(0);
20018
- if (VT.isFixedLengthVector()) {
20019
- // We can use the SVE whilelo instruction to lower this intrinsic by
20020
- // creating the appropriate sequence of scalable vector operations and
20021
- // then extracting a fixed-width subvector from the scalable vector.
20011
+ EVT VT = N->getValueType(0);
20012
+ if (VT.isFixedLengthVector()) {
20013
+ // We can use the SVE whilelo instruction to lower this intrinsic by
20014
+ // creating the appropriate sequence of scalable vector operations and
20015
+ // then extracting a fixed-width subvector from the scalable vector.
20016
+ SDLoc DL(N);
20017
+ SDValue ID =
20018
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
20022
20019
20023
- SDLoc DL(N);
20024
- SDValue ID =
20025
- DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64 );
20020
+ EVT WhileVT =
20021
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20022
+ ElementCount::getScalable(VT.getVectorNumElements()) );
20026
20023
20027
- EVT WhileVT = EVT::getVectorVT(
20028
- *DAG.getContext(), MVT::i1,
20029
- ElementCount::getScalable(VT.getVectorNumElements()));
20024
+ // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20025
+ EVT PromVT = getPromotedVTForPredicate(WhileVT);
20030
20026
20031
- // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20032
- EVT PromVT = getPromotedVTForPredicate(WhileVT);
20027
+ // Get the fixed-width equivalent of PromVT for extraction.
20028
+ EVT ExtVT =
20029
+ EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20030
+ VT.getVectorElementCount());
20033
20031
20034
- // Get the fixed-width equivalent of PromVT for extraction.
20035
- EVT ExtVT =
20036
- EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20037
- VT.getVectorElementCount());
20032
+ SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20033
+ N->getOperand(1), N->getOperand(2));
20034
+ Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20035
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20036
+ DAG.getConstant(0, DL, MVT::i64));
20037
+ Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
20038
20038
20039
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20040
- N->getOperand(1), N->getOperand(2));
20041
- Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20042
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20043
- DAG.getConstant(0, DL, MVT::i64));
20044
- Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
20045
- }
20046
20039
return Res;
20047
20040
}
20041
+
20042
+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20043
+ return SDValue();
20044
+
20045
+ if (!N->hasNUsesOfValue(2, 0))
20046
+ return SDValue();
20047
+
20048
+ auto It = N->use_begin();
20049
+ SDNode *Lo = *It++;
20050
+ SDNode *Hi = *It;
20051
+
20052
+ const uint64_t HalfSize = VT.getVectorMinNumElements() / 2;
20053
+ uint64_t OffLo, OffHi;
20054
+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20055
+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20056
+ (OffLo != 0 && OffLo != HalfSize) ||
20057
+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20058
+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi) ||
20059
+ (OffHi != 0 && OffHi != HalfSize))
20060
+ return SDValue();
20061
+
20062
+ if (OffLo > OffHi) {
20063
+ std::swap(Lo, Hi);
20064
+ std::swap(OffLo, OffHi);
20065
+ }
20066
+
20067
+ if (OffLo != 0 || OffHi != HalfSize)
20068
+ return SDValue();
20069
+
20070
+ SDLoc DL(N);
20071
+ SDValue ID =
20072
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20073
+ SDValue Idx = N->getOperand(1);
20074
+ SDValue TC = N->getOperand(2);
20075
+ if (Idx.getValueType() != MVT::i64) {
20076
+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20077
+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20078
+ }
20079
+ auto R =
20080
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20081
+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20082
+
20083
+ DCI.CombineTo(Lo, R.getValue(0));
20084
+ DCI.CombineTo(Hi, R.getValue(1));
20085
+
20086
+ return SDValue(N, 0);
20087
+ }
20088
+
20089
+ static SDValue performIntrinsicCombine(SDNode *N,
20090
+ TargetLowering::DAGCombinerInfo &DCI,
20091
+ const AArch64Subtarget *Subtarget) {
20092
+ SelectionDAG &DAG = DCI.DAG;
20093
+ unsigned IID = getIntrinsicID(N);
20094
+ switch (IID) {
20095
+ default:
20096
+ break;
20097
+ case Intrinsic::get_active_lane_mask:
20098
+ return tryCombineGetActiveLaneMask(N, DCI, Subtarget);
20048
20099
case Intrinsic::aarch64_neon_vcvtfxs2fp:
20049
20100
case Intrinsic::aarch64_neon_vcvtfxu2fp:
20050
20101
return tryCombineFixedPointConvert(N, DCI, DAG);
0 commit comments