Skip to content

Commit 3373074

Browse files
[AArch64] Combine getActiveLaneMask with vector_extract
... into a `whilelo` instruction with a pair of predicate registers.
1 parent f905935 commit 3373074

File tree

3 files changed

+455
-2
lines changed

3 files changed

+455
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,8 +1873,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18731873

18741874
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18751875
EVT OpVT) const {
1876-
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877-
if (!Subtarget->hasSVE())
1876+
// Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1877+
if (!Subtarget->hasSVEorSME())
18781878
return true;
18791879

18801880
// We can only support legal predicate result types. We can use the SVE
@@ -20507,6 +20507,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2050720507
return SDValue();
2050820508
}
2050920509

20510+
static SDValue tryCombineWhileLo(SDNode *N,
20511+
TargetLowering::DAGCombinerInfo &DCI,
20512+
const AArch64Subtarget *Subtarget) {
20513+
if (DCI.isBeforeLegalize())
20514+
return SDValue();
20515+
20516+
if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20517+
return SDValue();
20518+
20519+
if (!N->hasNUsesOfValue(2, 0))
20520+
return SDValue();
20521+
20522+
const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20523+
if (HalfSize < 2)
20524+
return SDValue();
20525+
20526+
auto It = N->use_begin();
20527+
SDNode *Lo = *It++;
20528+
SDNode *Hi = *It;
20529+
20530+
uint64_t OffLo, OffHi;
20531+
if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20532+
!isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20533+
Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20534+
!isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20535+
return SDValue();
20536+
20537+
if (OffLo > OffHi) {
20538+
std::swap(Lo, Hi);
20539+
std::swap(OffLo, OffHi);
20540+
}
20541+
20542+
if (OffLo != 0 || OffHi != HalfSize)
20543+
return SDValue();
20544+
20545+
SelectionDAG &DAG = DCI.DAG;
20546+
SDLoc DL(N);
20547+
SDValue ID =
20548+
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20549+
SDValue Idx = N->getOperand(1);
20550+
SDValue TC = N->getOperand(2);
20551+
if (Idx.getValueType() != MVT::i64) {
20552+
Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20553+
TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20554+
}
20555+
auto R =
20556+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20557+
{Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20558+
20559+
DCI.CombineTo(Lo, R.getValue(0));
20560+
DCI.CombineTo(Hi, R.getValue(1));
20561+
20562+
return SDValue(N, 0);
20563+
}
20564+
2051020565
static SDValue performIntrinsicCombine(SDNode *N,
2051120566
TargetLowering::DAGCombinerInfo &DCI,
2051220567
const AArch64Subtarget *Subtarget) {
@@ -20837,6 +20892,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
2083720892
case Intrinsic::aarch64_sve_ptest_last:
2083820893
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
2083920894
AArch64CC::LAST_ACTIVE);
20895+
case Intrinsic::aarch64_sve_whilelo:
20896+
return tryCombineWhileLo(N, DCI, Subtarget);
2084020897
}
2084120898
return SDValue();
2084220899
}

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
34

45
; == Scalable ==
56

0 commit comments

Comments
 (0)