@@ -1873,8 +1873,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1873
1873
1874
1874
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1875
1875
EVT OpVT) const {
1876
- // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877
- if (!Subtarget->hasSVE ())
1876
+ // Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877
+ if (!Subtarget->hasSVEorSME ())
1878
1878
return true;
1879
1879
1880
1880
// We can only support legal predicate result types. We can use the SVE
@@ -20507,6 +20507,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
20507
20507
return SDValue();
20508
20508
}
20509
20509
20510
+ static SDValue tryCombineWhileLo(SDNode *N,
20511
+ TargetLowering::DAGCombinerInfo &DCI,
20512
+ const AArch64Subtarget *Subtarget) {
20513
+ if (DCI.isBeforeLegalize())
20514
+ return SDValue();
20515
+
20516
+ if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20517
+ return SDValue();
20518
+
20519
+ if (!N->hasNUsesOfValue(2, 0))
20520
+ return SDValue();
20521
+
20522
+ const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20523
+ if (HalfSize < 2)
20524
+ return SDValue();
20525
+
20526
+ auto It = N->use_begin();
20527
+ SDNode *Lo = *It++;
20528
+ SDNode *Hi = *It;
20529
+
20530
+ uint64_t OffLo, OffHi;
20531
+ if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20532
+ !isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20533
+ Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20534
+ !isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20535
+ return SDValue();
20536
+
20537
+ if (OffLo > OffHi) {
20538
+ std::swap(Lo, Hi);
20539
+ std::swap(OffLo, OffHi);
20540
+ }
20541
+
20542
+ if (OffLo != 0 || OffHi != HalfSize)
20543
+ return SDValue();
20544
+
20545
+ SelectionDAG &DAG = DCI.DAG;
20546
+ SDLoc DL(N);
20547
+ SDValue ID =
20548
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20549
+ SDValue Idx = N->getOperand(1);
20550
+ SDValue TC = N->getOperand(2);
20551
+ if (Idx.getValueType() != MVT::i64) {
20552
+ Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20553
+ TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20554
+ }
20555
+ auto R =
20556
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20557
+ {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20558
+
20559
+ DCI.CombineTo(Lo, R.getValue(0));
20560
+ DCI.CombineTo(Hi, R.getValue(1));
20561
+
20562
+ return SDValue(N, 0);
20563
+ }
20564
+
20510
20565
static SDValue performIntrinsicCombine(SDNode *N,
20511
20566
TargetLowering::DAGCombinerInfo &DCI,
20512
20567
const AArch64Subtarget *Subtarget) {
@@ -20837,6 +20892,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
20837
20892
case Intrinsic::aarch64_sve_ptest_last:
20838
20893
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
20839
20894
AArch64CC::LAST_ACTIVE);
20895
+ case Intrinsic::aarch64_sve_whilelo:
20896
+ return tryCombineWhileLo(N, DCI, Subtarget);
20840
20897
}
20841
20898
return SDValue();
20842
20899
}
0 commit comments