@@ -1830,8 +1830,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1830
1830
1831
1831
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1832
1832
EVT OpVT) const {
1833
- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1834
- if (!Subtarget->hasSVE ())
1833
+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1834
+ if (!Subtarget->hasSVEorSME ())
1835
1835
return true;
1836
1836
1837
1837
// We can only support legal predicate result types. We can use the SVE
@@ -20522,6 +20522,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
20522
20522
return SDValue();
20523
20523
}
20524
20524
20525
+ static SDValue tryCombineWhileLo(SDNode *N,
20526
+ TargetLowering::DAGCombinerInfo &DCI,
20527
+ const AArch64Subtarget *Subtarget) {
20528
+ if (DCI.isBeforeLegalize())
20529
+ return SDValue();
20530
+
20531
+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20532
+ return SDValue();
20533
+
20534
+ if (!N->hasNUsesOfValue(2, 0))
20535
+ return SDValue();
20536
+
20537
+ const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20538
+ if (HalfSize < 2)
20539
+ return SDValue();
20540
+
20541
+ auto It = N->use_begin();
20542
+ SDNode *Lo = *It++;
20543
+ SDNode *Hi = *It;
20544
+
20545
+ uint64_t OffLo, OffHi;
20546
+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20547
+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20548
+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20549
+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20550
+ return SDValue();
20551
+
20552
+ if (OffLo > OffHi) {
20553
+ std::swap(Lo, Hi);
20554
+ std::swap(OffLo, OffHi);
20555
+ }
20556
+
20557
+ if (OffLo != 0 || OffHi != HalfSize)
20558
+ return SDValue();
20559
+
20560
+ SelectionDAG &DAG = DCI.DAG;
20561
+ SDLoc DL(N);
20562
+ SDValue ID =
20563
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20564
+ SDValue Idx = N->getOperand(1);
20565
+ SDValue TC = N->getOperand(2);
20566
+ if (Idx.getValueType() != MVT::i64) {
20567
+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20568
+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20569
+ }
20570
+ auto R =
20571
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20572
+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20573
+
20574
+ DCI.CombineTo(Lo, R.getValue(0));
20575
+ DCI.CombineTo(Hi, R.getValue(1));
20576
+
20577
+ return SDValue(N, 0);
20578
+ }
20579
+
20525
20580
static SDValue performIntrinsicCombine(SDNode *N,
20526
20581
TargetLowering::DAGCombinerInfo &DCI,
20527
20582
const AArch64Subtarget *Subtarget) {
@@ -20852,6 +20907,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
20852
20907
case Intrinsic::aarch64_sve_ptest_last:
20853
20908
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
20854
20909
AArch64CC::LAST_ACTIVE);
20910
+ case Intrinsic::aarch64_sve_whilelo:
20911
+ return tryCombineWhileLo(N, DCI, Subtarget);
20855
20912
}
20856
20913
return SDValue();
20857
20914
}
0 commit comments