Skip to content

Commit 1a75829

Browse files
[AArch64] Combine getActiveLaneMask with vector_extract
... into a `whilelo` instruction with a pair of predicate registers.
1 parent 9bebf25 commit 1a75829

File tree

3 files changed

+455
-2
lines changed

3 files changed

+455
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,8 +1830,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18301830

18311831
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18321832
EVT OpVT) const {
1833-
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1834-
if (!Subtarget->hasSVE())
1833+
// Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1834+
if (!Subtarget->hasSVEorSME())
18351835
return true;
18361836

18371837
// We can only support legal predicate result types. We can use the SVE
@@ -20522,6 +20522,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2052220522
return SDValue();
2052320523
}
2052420524

20525+
static SDValue tryCombineWhileLo(SDNode *N,
20526+
TargetLowering::DAGCombinerInfo &DCI,
20527+
const AArch64Subtarget *Subtarget) {
20528+
if (DCI.isBeforeLegalize())
20529+
return SDValue();
20530+
20531+
if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20532+
return SDValue();
20533+
20534+
if (!N->hasNUsesOfValue(2, 0))
20535+
return SDValue();
20536+
20537+
const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20538+
if (HalfSize < 2)
20539+
return SDValue();
20540+
20541+
auto It = N->use_begin();
20542+
SDNode *Lo = *It++;
20543+
SDNode *Hi = *It;
20544+
20545+
uint64_t OffLo, OffHi;
20546+
if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20547+
!isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20548+
Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20549+
!isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20550+
return SDValue();
20551+
20552+
if (OffLo > OffHi) {
20553+
std::swap(Lo, Hi);
20554+
std::swap(OffLo, OffHi);
20555+
}
20556+
20557+
if (OffLo != 0 || OffHi != HalfSize)
20558+
return SDValue();
20559+
20560+
SelectionDAG &DAG = DCI.DAG;
20561+
SDLoc DL(N);
20562+
SDValue ID =
20563+
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20564+
SDValue Idx = N->getOperand(1);
20565+
SDValue TC = N->getOperand(2);
20566+
if (Idx.getValueType() != MVT::i64) {
20567+
Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20568+
TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20569+
}
20570+
auto R =
20571+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20572+
{Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20573+
20574+
DCI.CombineTo(Lo, R.getValue(0));
20575+
DCI.CombineTo(Hi, R.getValue(1));
20576+
20577+
return SDValue(N, 0);
20578+
}
20579+
2052520580
static SDValue performIntrinsicCombine(SDNode *N,
2052620581
TargetLowering::DAGCombinerInfo &DCI,
2052720582
const AArch64Subtarget *Subtarget) {
@@ -20852,6 +20907,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
2085220907
case Intrinsic::aarch64_sve_ptest_last:
2085320908
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
2085420909
AArch64CC::LAST_ACTIVE);
20910+
case Intrinsic::aarch64_sve_whilelo:
20911+
return tryCombineWhileLo(N, DCI, Subtarget);
2085520912
}
2085620913
return SDValue();
2085720914
}

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
34

45
; == Scalable ==
56

0 commit comments

Comments
 (0)