Skip to content

Vector masked extract last active element intrinsic #113587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20004,6 +20004,33 @@ the follow sequence of operations:

The ``mask`` operand will apply to at least the gather and scatter operations.

'``llvm.experimental.vector.extract.last.active``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

This is an overloaded intrinsic.

::

declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %mask, i32 %passthru)
declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)

Arguments:
""""""""""

The first argument is the data vector to extract a lane from. The second is a
mask vector controlling the extraction. The third argument is a passthru
value.

The two input vectors must have the same number of elements, and the type of
the passthru value must match that of the elements of the data vector.

Semantics:
""""""""""

The '``llvm.experimental.vector.extract.last.active``' intrinsic will extract an
element from the data vector at the index matching the highest active lane of
the mask vector. If no mask lanes are active then the passthru value is
returned instead.

.. _int_vector_compress:

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,12 @@ def int_experimental_vector_match : DefaultAttrsIntrinsic<
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], // Mask
[ IntrNoMem, IntrNoSync, IntrWillReturn ]>;

// Extract based on mask bits
def int_experimental_vector_extract_last_active:
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMVectorElementType<0>], [IntrNoMem]>;

// Operators
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
// Integer arithmetic
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6415,6 +6415,50 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
DAG.setRoot(Histogram);
}

void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
unsigned Intrinsic) {
assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
"Tried lowering invalid vector extract last");
SDLoc sdl = getCurSDLoc();
SDValue Data = getValue(I.getOperand(0));
SDValue Mask = getValue(I.getOperand(1));
SDValue PassThru = getValue(I.getOperand(2));

EVT DataVT = Data.getValueType();
EVT ScalarVT = PassThru.getValueType();
EVT BoolVT = Mask.getValueType().getScalarType();

// Find a suitable type for a stepvector.
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value.
if (DataVT.isScalableVector())
VScaleRange = getVScaleRange(I.getCaller(), 64);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned EltWidth = TLI.getBitWidthForCttzElements(
I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true,
&VScaleRange);
MVT StepVT = MVT::getIntegerVT(EltWidth);
EVT StepVecVT = DataVT.changeVectorElementType(StepVT);

// Zero out lanes with inactive elements, then find the highest remaining
// value from the stepvector.
SDValue Zeroes = DAG.getConstant(0, sdl, StepVecVT);
SDValue StepVec = DAG.getStepVector(sdl, StepVecVT);
SDValue ActiveElts = DAG.getSelect(sdl, StepVecVT, Mask, StepVec, Zeroes);
SDValue HighestIdx =
DAG.getNode(ISD::VECREDUCE_UMAX, sdl, StepVT, ActiveElts);

// Extract the corresponding lane from the data vector
EVT ExtVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue Idx = DAG.getZExtOrTrunc(HighestIdx, sdl, ExtVT);
SDValue Extract =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ScalarVT, Data, Idx);

// If all mask lanes were inactive, choose the passthru value instead.
SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask);
SDValue Result = DAG.getSelect(sdl, ScalarVT, AnyActive, Extract, PassThru);
setValue(&I, Result);
}

/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
Expand Down Expand Up @@ -8236,6 +8280,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
visitVectorHistogram(I, Intrinsic);
return;
}
case Intrinsic::experimental_vector_extract_last_active: {
visitVectorExtractLastActive(I, Intrinsic);
return;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ class SelectionDAGBuilder {
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID);
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
if (Name.consume_front("experimental.vector.")) {
Intrinsic::ID ID =
StringSwitch<Intrinsic::ID>(Name)
// Skip over extract.last.active, otherwise it will be 'upgraded'
// to a regular vector extract which is a different operation.
.StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
.StartsWith("extract.", Intrinsic::vector_extract)
.StartsWith("insert.", Intrinsic::vector_insert)
.StartsWith("splice.", Intrinsic::vector_splice)
Expand Down
Loading
Loading