@@ -1834,8 +1834,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1834
1834
1835
1835
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1836
1836
EVT OpVT) const {
1837
- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838
- if (!Subtarget->hasSVE ())
1837
+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838
+ if (!Subtarget->hasSVEorSME ())
1839
1839
return true;
1840
1840
1841
1841
// We can only support legal predicate result types. We can use the SVE
@@ -20535,6 +20535,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
20535
20535
return SDValue();
20536
20536
}
20537
20537
20538
+ static SDValue tryCombineWhileLo(SDNode *N,
20539
+ TargetLowering::DAGCombinerInfo &DCI,
20540
+ const AArch64Subtarget *Subtarget) {
20541
+ if (DCI.isBeforeLegalize())
20542
+ return SDValue();
20543
+
20544
+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20545
+ return SDValue();
20546
+
20547
+ if (!N->hasNUsesOfValue(2, 0))
20548
+ return SDValue();
20549
+
20550
+ const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20551
+ if (HalfSize < 2)
20552
+ return SDValue();
20553
+
20554
+ auto It = N->use_begin();
20555
+ SDNode *Lo = *It++;
20556
+ SDNode *Hi = *It;
20557
+
20558
+ uint64_t OffLo, OffHi;
20559
+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20560
+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20561
+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20562
+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20563
+ return SDValue();
20564
+
20565
+ if (OffLo > OffHi) {
20566
+ std::swap(Lo, Hi);
20567
+ std::swap(OffLo, OffHi);
20568
+ }
20569
+
20570
+ if (OffLo != 0 || OffHi != HalfSize)
20571
+ return SDValue();
20572
+
20573
+ SelectionDAG &DAG = DCI.DAG;
20574
+ SDLoc DL(N);
20575
+ SDValue ID =
20576
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20577
+ SDValue Idx = N->getOperand(1);
20578
+ SDValue TC = N->getOperand(2);
20579
+ if (Idx.getValueType() != MVT::i64) {
20580
+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20581
+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20582
+ }
20583
+ auto R =
20584
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20585
+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20586
+
20587
+ DCI.CombineTo(Lo, R.getValue(0));
20588
+ DCI.CombineTo(Hi, R.getValue(1));
20589
+
20590
+ return SDValue(N, 0);
20591
+ }
20592
+
20538
20593
static SDValue performIntrinsicCombine(SDNode *N,
20539
20594
TargetLowering::DAGCombinerInfo &DCI,
20540
20595
const AArch64Subtarget *Subtarget) {
@@ -20832,6 +20887,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
20832
20887
case Intrinsic::aarch64_sve_ptest_last:
20833
20888
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
20834
20889
AArch64CC::LAST_ACTIVE);
20890
+ case Intrinsic::aarch64_sve_whilelo:
20891
+ return tryCombineWhileLo(N, DCI, Subtarget);
20835
20892
}
20836
20893
return SDValue();
20837
20894
}
0 commit comments