Skip to content

Commit 5df59e4

Browse files
committed
[AArch64] Improve codegen for extract.last.active
When SVE support is present, we can use the 'clastb' instruction to perform the work of extract.last.active.
1 parent dddeec4 commit 5df59e4

File tree

5 files changed

+100
-163
lines changed

5 files changed

+100
-163
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ class TargetLoweringBase {
490490
return true;
491491
}
492492

493+
virtual bool
494+
shouldExpandVectorExtractLastActive(const IntrinsicInst *I) const {
495+
return true;
496+
}
497+
493498
// Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
494499
// vecreduce(op(x, y)) for the reduction opcode RedOpc.
495500
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6417,6 +6417,13 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
64176417

64186418
void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
64196419
unsigned Intrinsic) {
6420+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6421+
6422+
if (!TLI.shouldExpandVectorExtractLastActive(cast<IntrinsicInst>(&I))) {
6423+
visitTargetIntrinsic(I, Intrinsic);
6424+
return;
6425+
}
6426+
64206427
assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
64216428
"Tried lowering invalid vector extract last");
64226429
SDLoc sdl = getCurSDLoc();
@@ -6432,7 +6439,6 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
64326439
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value.
64336440
if (DataVT.isScalableVector())
64346441
VScaleRange = getVScaleRange(I.getCaller(), 64);
6435-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
64366442
unsigned EltWidth = TLI.getBitWidthForCttzElements(
64376443
I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true,
64386444
&VScaleRange);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,6 +2072,23 @@ bool AArch64TargetLowering::shouldExpandVectorMatch(EVT VT,
20722072
return true;
20732073
}
20742074

2075+
bool AArch64TargetLowering::shouldExpandVectorExtractLastActive(
2076+
const IntrinsicInst *I) const {
2077+
// 'clastb' requires SVE support.
2078+
if (!Subtarget->hasSVE())
2079+
return true;
2080+
2081+
// Check if the input data vector is a legal supported type.
2082+
EVT VT = EVT::getEVT(I->getArgOperand(0)->getType());
2083+
EVT ScalarVT = VT.getScalarType();
2084+
2085+
if (ScalarVT != MVT::i8 && ScalarVT != MVT::i16 && ScalarVT != MVT::i32 &&
2086+
ScalarVT != MVT::i64 && ScalarVT != MVT::f32 && ScalarVT != MVT::f64)
2087+
return true;
2088+
2089+
return VT.getStoreSizeInBits().getKnownMinValue() != 128;
2090+
}
2091+
20752092
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
20762093
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
20772094

@@ -6405,6 +6422,22 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
64056422
case Intrinsic::experimental_vector_match: {
64066423
return LowerVectorMatch(Op, DAG);
64076424
}
6425+
case Intrinsic::experimental_vector_extract_last_active: {
6426+
SDValue Data = Op.getOperand(1);
6427+
SDValue Mask = Op.getOperand(2);
6428+
SDValue PassThru = Op.getOperand(3);
6429+
EVT VT = Op.getValueType();
6430+
EVT DataVT = Data.getValueType();
6431+
6432+
if (DataVT.isFixedLengthVector()) {
6433+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, DataVT);
6434+
EVT MaskVT = ContainerVT.changeElementType(MVT::i1);
6435+
Data = convertToScalableVector(DAG, ContainerVT, Data);
6436+
Mask = convertToScalableVector(DAG, MaskVT, Mask);
6437+
}
6438+
6439+
return DAG.getNode(AArch64ISD::CLASTB_N, dl, VT, Mask, PassThru, Data);
6440+
}
64086441
}
64096442
}
64106443

@@ -27192,6 +27225,18 @@ void AArch64TargetLowering::ReplaceNodeResults(
2719227225
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
2719327226
return;
2719427227
}
27228+
case Intrinsic::experimental_vector_extract_last_active: {
27229+
assert((VT == MVT::i8 || VT == MVT::i16) &&
27230+
"custom lowering for unexpected type");
27231+
SDLoc DL(N);
27232+
auto PassThru =
27233+
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(3));
27234+
auto Extract =
27235+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, N->getOperand(0),
27236+
N->getOperand(1), N->getOperand(2), PassThru);
27237+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
27238+
return;
27239+
}
2719527240
}
2719627241
}
2719727242
case ISD::READ_REGISTER: {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,9 @@ class AArch64TargetLowering : public TargetLowering {
987987

988988
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override;
989989

990+
bool
991+
shouldExpandVectorExtractLastActive(const IntrinsicInst *) const override;
992+
990993
/// If a change in streaming mode is required on entry to/return from a
991994
/// function call it emits and returns the corresponding SMSTART or SMSTOP
992995
/// node. \p Condition should be one of the enum values from

0 commit comments

Comments
 (0)