Skip to content

Commit 90ef28b

Browse files
[AArch64] Combine getActiveLaneMask with vector_extract
... into a `whilelo` instruction with a pair of predicate registers.
1 parent 195d8ac commit 90ef28b

File tree

3 files changed

+455
-2
lines changed

3 files changed

+455
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,8 +1834,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18341834

18351835
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18361836
EVT OpVT) const {
1837-
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838-
if (!Subtarget->hasSVE())
1837+
// Only SVE/SME has a 1:1 mapping from intrinsic -> instruction (whilelo).
1838+
if (!Subtarget->hasSVEorSME())
18391839
return true;
18401840

18411841
// We can only support legal predicate result types. We can use the SVE
@@ -20535,6 +20535,61 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
2053520535
return SDValue();
2053620536
}
2053720537

20538+
static SDValue tryCombineWhileLo(SDNode *N,
20539+
TargetLowering::DAGCombinerInfo &DCI,
20540+
const AArch64Subtarget *Subtarget) {
20541+
if (DCI.isBeforeLegalize())
20542+
return SDValue();
20543+
20544+
if (!Subtarget->hasSVE2p1() && !Subtarget->hasSME2())
20545+
return SDValue();
20546+
20547+
if (!N->hasNUsesOfValue(2, 0))
20548+
return SDValue();
20549+
20550+
const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2;
20551+
if (HalfSize < 2)
20552+
return SDValue();
20553+
20554+
auto It = N->use_begin();
20555+
SDNode *Lo = *It++;
20556+
SDNode *Hi = *It;
20557+
20558+
uint64_t OffLo, OffHi;
20559+
if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20560+
!isIntImmediate(Lo->getOperand(1).getNode(), OffLo) ||
20561+
Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20562+
!isIntImmediate(Hi->getOperand(1).getNode(), OffHi))
20563+
return SDValue();
20564+
20565+
if (OffLo > OffHi) {
20566+
std::swap(Lo, Hi);
20567+
std::swap(OffLo, OffHi);
20568+
}
20569+
20570+
if (OffLo != 0 || OffHi != HalfSize)
20571+
return SDValue();
20572+
20573+
SelectionDAG &DAG = DCI.DAG;
20574+
SDLoc DL(N);
20575+
SDValue ID =
20576+
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
20577+
SDValue Idx = N->getOperand(1);
20578+
SDValue TC = N->getOperand(2);
20579+
if (Idx.getValueType() != MVT::i64) {
20580+
Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64);
20581+
TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64);
20582+
}
20583+
auto R =
20584+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
20585+
{Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC});
20586+
20587+
DCI.CombineTo(Lo, R.getValue(0));
20588+
DCI.CombineTo(Hi, R.getValue(1));
20589+
20590+
return SDValue(N, 0);
20591+
}
20592+
2053820593
static SDValue performIntrinsicCombine(SDNode *N,
2053920594
TargetLowering::DAGCombinerInfo &DCI,
2054020595
const AArch64Subtarget *Subtarget) {
@@ -20832,6 +20887,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
2083220887
case Intrinsic::aarch64_sve_ptest_last:
2083320888
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
2083420889
AArch64CC::LAST_ACTIVE);
20890+
case Intrinsic::aarch64_sve_whilelo:
20891+
return tryCombineWhileLo(N, DCI, Subtarget);
2083520892
}
2083620893
return SDValue();
2083720894
}

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
34

45
; == Scalable ==
56

0 commit comments

Comments
 (0)