Skip to content

Commit 9df5ff0

Browse files
committed
Remove SegSize and provide general lowering
1 parent 3e95323 commit 9df5ff0

File tree

11 files changed

+333
-111
lines changed

11 files changed

+333
-111
lines changed

llvm/docs/LangRef.rst

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19969,47 +19969,33 @@ dependent.
1996919969

1997019970
::
1997119971

19972-
declare <<n> x i1> @llvm.experimental.vector.match(<<n> x <ty>> %op1, <<n> x <ty>> %op2, <<n> x i1> %mask, i32 <segsize>)
19973-
declare <vscale x <n> x i1> @llvm.experimental.vector.match(<vscale x <n> x <ty>> %op1, <vscale x <n> x <ty>> %op2, <vscale x <n> x i1> %mask, i32 <segsize>)
19972+
declare <<n> x i1> @llvm.experimental.vector.match(<<n> x <ty>> %op1, <<m> x <ty>> %op2, <<n> x i1> %mask)
19973+
declare <vscale x <n> x i1> @llvm.experimental.vector.match(<vscale x <n> x <ty>> %op1, <<m> x <ty>> %op2, <vscale x <n> x i1> %mask)
1997419974

1997519975
Overview:
1997619976
"""""""""
1997719977

19978-
Find elements of the first argument matching any elements of the second.
19978+
Find active elements of the first argument matching any elements of the second.
1997919979

1998019980
Arguments:
1998119981
""""""""""
1998219982

19983-
The first argument is the search vector, the second argument is the vector of
19983+
The first argument is the search vector, the second argument the vector of
1998419984
elements we are searching for (i.e. for which we consider a match successful),
1998519985
and the third argument is a mask that controls which elements of the first
19986-
argument are active. The fourth argument is an immediate that sets the segment
19987-
size for the search window.
19986+
argument are active.
1998819987

1998919988
Semantics:
1999019989
""""""""""
1999119990

19992-
The '``llvm.experimental.vector.match``' intrinsic compares each element in the
19993-
first argument against potentially several elements of the second, placing
19991+
The '``llvm.experimental.vector.match``' intrinsic compares each active element
19992+
in the first argument against the elements of the second argument, placing
1999419993
``1`` in the corresponding element of the output vector if any comparison is
1999519994
successful, and ``0`` otherwise. Inactive elements in the mask are set to ``0``
19996-
in the output. The segment size controls the number of elements of the second
19997-
argument that are compared against.
19998-
19999-
For example, for vectors with 16 elements, if ``segsize = 16`` then each
20000-
element of the first argument is compared against all 16 elements of the second
20001-
argument; but if ``segsize = 4``, then each of the first four elements of the
20002-
first argument is compared against the first four elements of the second
20003-
argument, each of the second four elements of the first argument is compared
20004-
against the second four elements of the second argument, and so forth.
20005-
20006-
Currently, ``segsize`` needs to be an immediate value. The special value of
20007-
``-1`` is allowed to indicate all elements should be searched.
20008-
20009-
Support for specific vector types is target dependent. For AArch64 targets with
20010-
SVE2 support, the intrinsic is valid on ``<16 x i8>`` or ``<8 x i16>`` vectors,
20011-
or the scalable equivalents, with a ``segsize`` equal to the known minimum
20012-
number of elements of the vectors (16 or 8, respectively).
19995+
in the output.
19996+
19997+
The second argument needs to be a fixed-length vector with the same element
19998+
type as the first argument.
2001319999

2001420000
Matrix Intrinsics
2001520001
-----------------

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,9 +1744,10 @@ class TargetTransformInfo {
17441744
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
17451745
Align Alignment) const;
17461746

1747-
/// \returns Returns true if the target supports vector match operations for
1748-
/// the vector type `VT` using a segment size of `SegSize`.
1749-
bool hasVectorMatch(VectorType *VT, unsigned SegSize) const;
1747+
/// \returns True if the target has hardware support for vector match
1748+
/// operations between vectors of type `VT` and search vectors of `SearchSize`
1749+
/// elements, and false otherwise.
1750+
bool hasVectorMatch(VectorType *VT, unsigned SearchSize) const;
17501751

17511752
struct VPLegalization {
17521753
enum VPTransform {
@@ -2186,7 +2187,7 @@ class TargetTransformInfo::Concept {
21862187
virtual bool supportsScalableVectors() const = 0;
21872188
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
21882189
Align Alignment) const = 0;
2189-
virtual bool hasVectorMatch(VectorType *VT, unsigned SegSize) const = 0;
2190+
virtual bool hasVectorMatch(VectorType *VT, unsigned SearchSize) const = 0;
21902191
virtual VPLegalization
21912192
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
21922193
virtual bool hasArmWideBranch(bool Thumb) const = 0;
@@ -2957,8 +2958,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
29572958
return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
29582959
}
29592960

2960-
bool hasVectorMatch(VectorType *VT, unsigned SegSize) const override {
2961-
return Impl.hasVectorMatch(VT, SegSize);
2961+
bool hasVectorMatch(VectorType *VT, unsigned SearchSize) const override {
2962+
return Impl.hasVectorMatch(VT, SearchSize);
29622963
}
29632964

29642965
VPLegalization

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -972,7 +972,9 @@ class TargetTransformInfoImplBase {
972972
return false;
973973
}
974974

975-
bool hasVectorMatch(VectorType *VT, unsigned SegSize) const { return false; }
975+
bool hasVectorMatch(VectorType *VT, unsigned SearchSize) const {
976+
return false;
977+
}
976978

977979
TargetTransformInfo::VPLegalization
978980
getVPLegalizationStrategy(const VPIntrinsic &PI) const {

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1916,11 +1916,9 @@ def int_experimental_vector_histogram_add : DefaultAttrsIntrinsic<[],
19161916
def int_experimental_vector_match : DefaultAttrsIntrinsic<
19171917
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
19181918
[ llvm_anyvector_ty,
1919-
LLVMMatchType<0>,
1920-
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Mask
1921-
llvm_i32_ty ], // Segment size
1922-
[ IntrNoMem, IntrNoSync, IntrWillReturn,
1923-
ImmArg<ArgIndex<3>> ]>;
1919+
llvm_anyvector_ty,
1920+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], // Mask
1921+
[ IntrNoMem, IntrNoSync, IntrWillReturn ]>;
19241922

19251923
// Operators
19261924
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,8 +1355,8 @@ bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
13551355
}
13561356

13571357
bool TargetTransformInfo::hasVectorMatch(VectorType *VT,
1358-
unsigned SegSize) const {
1359-
return TTIImpl->hasVectorMatch(VT, SegSize);
1358+
unsigned SearchSize) const {
1359+
return TTIImpl->hasVectorMatch(VT, SearchSize);
13601360
}
13611361

13621362
TargetTransformInfo::Concept::~Concept() = default;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8138,12 +8138,39 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
81388138
return;
81398139
}
81408140
case Intrinsic::experimental_vector_match: {
8141-
auto *VT = dyn_cast<VectorType>(I.getOperand(0)->getType());
8142-
auto SegmentSize = cast<ConstantInt>(I.getOperand(3))->getLimitedValue();
8141+
SDValue Op1 = getValue(I.getOperand(0));
8142+
SDValue Op2 = getValue(I.getOperand(1));
8143+
SDValue Mask = getValue(I.getOperand(2));
8144+
EVT Op1VT = Op1.getValueType();
8145+
EVT Op2VT = Op2.getValueType();
8146+
EVT ResVT = Mask.getValueType();
8147+
unsigned SearchSize = Op2VT.getVectorNumElements();
8148+
8149+
LLVMContext &Ctx = *DAG.getContext();
81438150
const auto &TTI =
81448151
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
8145-
assert(VT && TTI.hasVectorMatch(VT, SegmentSize) && "Unsupported type!");
8146-
visitTargetIntrinsic(I, Intrinsic);
8152+
8153+
// If the target has native support for this vector match operation, lower
8154+
// the intrinsic directly; otherwise, lower it below.
8155+
if (TTI.hasVectorMatch(cast<VectorType>(Op1VT.getTypeForEVT(Ctx)),
8156+
SearchSize)) {
8157+
visitTargetIntrinsic(I, Intrinsic);
8158+
return;
8159+
}
8160+
8161+
SDValue Ret = DAG.getNode(ISD::SPLAT_VECTOR, sdl, ResVT,
8162+
DAG.getConstant(0, sdl, MVT::i1));
8163+
8164+
for (unsigned i = 0; i < SearchSize; ++i) {
8165+
SDValue Op2Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
8166+
Op2VT.getVectorElementType(), Op2,
8167+
DAG.getVectorIdxConstant(i, sdl));
8168+
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, sdl, Op1VT, Op2Elem);
8169+
SDValue Cmp = DAG.getSetCC(sdl, ResVT, Op1, Splat, ISD::SETEQ);
8170+
Ret = DAG.getNode(ISD::OR, sdl, ResVT, Ret, Cmp);
8171+
}
8172+
8173+
setValue(&I, DAG.getNode(ISD::AND, sdl, ResVT, Ret, Mask));
81478174
return;
81488175
}
81498176
case Intrinsic::vector_reverse:

llvm/lib/IR/Verifier.cpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6112,28 +6112,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
61126112
Value *Op1 = Call.getArgOperand(0);
61136113
Value *Op2 = Call.getArgOperand(1);
61146114
Value *Mask = Call.getArgOperand(2);
6115-
Value *SegSize = Call.getArgOperand(3);
61166115

6117-
VectorType *OpTy = dyn_cast<VectorType>(Op1->getType());
6116+
VectorType *Op1Ty = dyn_cast<VectorType>(Op1->getType());
6117+
VectorType *Op2Ty = dyn_cast<VectorType>(Op2->getType());
61186118
VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
6119-
Check(OpTy && MaskTy, "experimental.vector.match operands are not vectors.",
6120-
&Call);
6121-
Check(Op2->getType() == OpTy,
6122-
"experimental.vector.match first two operands must have matching "
6123-
"types.",
6124-
&Call);
6125-
Check(isa<ConstantInt>(SegSize),
6126-
"experimental.vector.match segment size needs to be an immediate "
6127-
"integer.",
6128-
&Call);
61296119

6130-
ElementCount EC = OpTy->getElementCount();
6131-
Check(MaskTy->getElementCount() == EC,
6132-
"experimental.vector.match mask must have the same number of "
6133-
"elements as the remaining vector operands.",
6120+
Check(Op1Ty && Op2Ty && MaskTy, "Operands must be vectors.", &Call);
6121+
Check(!isa<ScalableVectorType>(Op2Ty), "Second operand cannot be scalable.",
6122+
&Call);
6123+
Check(Op1Ty->getElementType() == Op2Ty->getElementType(),
6124+
"First two operands must have the same element type.", &Call);
6125+
Check(Op1Ty->getElementCount() == MaskTy->getElementCount(),
6126+
"First operand and mask must have the same number of elements.",
61346127
&Call);
61356128
Check(MaskTy->getElementType()->isIntegerTy(1),
6136-
"experimental.vector.match mask element type is not i1.", &Call);
6129+
"Mask must be a vector of i1's.", &Call);
61376130
break;
61386131
}
61396132
case Intrinsic::vector_insert: {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6262,41 +6262,48 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
62626262
auto Op1 = Op.getOperand(1);
62636263
auto Op2 = Op.getOperand(2);
62646264
auto Mask = Op.getOperand(3);
6265-
auto SegmentSize =
6266-
cast<ConstantSDNode>(Op.getOperand(4))->getLimitedValue();
62676265

6268-
EVT VT = Op.getValueType();
6269-
auto MinNumElts = VT.getVectorMinNumElements();
6270-
6271-
assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch.");
6272-
assert(Op1.getValueSizeInBits().getKnownMinValue() == 128 &&
6273-
"Custom lower only works on 128-bit segments.");
6274-
assert((Op1.getValueType().getVectorElementType() == MVT::i8 ||
6275-
Op1.getValueType().getVectorElementType() == MVT::i16) &&
6276-
"Custom lower only supports 8-bit or 16-bit characters.");
6277-
assert(SegmentSize == MinNumElts && "Custom lower needs segment size to "
6278-
"match minimum number of elements.");
6279-
6280-
if (VT.isScalableVector())
6281-
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Mask, Op1, Op2);
6282-
6283-
// We can use the SVE2 match instruction to lower this intrinsic by
6284-
// converting the operands to scalable vectors, doing a match, and then
6285-
// extracting a fixed-width subvector from the scalable vector.
6266+
EVT Op1VT = Op1.getValueType();
6267+
EVT Op2VT = Op2.getValueType();
6268+
EVT ResVT = Op.getValueType();
62866269

6287-
EVT OpVT = Op1.getValueType();
6288-
EVT OpContainerVT = getContainerForFixedLengthVector(DAG, OpVT);
6270+
assert((Op1VT.getVectorElementType() == MVT::i8 ||
6271+
Op1VT.getVectorElementType() == MVT::i16) &&
6272+
"Expected 8-bit or 16-bit characters.");
6273+
assert(!Op2VT.isScalableVector() && "Search vector cannot be scalable.");
6274+
assert(Op1VT.getVectorElementType() == Op2VT.getVectorElementType() &&
6275+
"Operand type mismatch.");
6276+
assert(Op1VT.getVectorMinNumElements() == Op2VT.getVectorNumElements() &&
6277+
"Invalid operands.");
6278+
6279+
// Wrap the search vector in a scalable vector.
6280+
EVT OpContainerVT = getContainerForFixedLengthVector(DAG, Op2VT);
6281+
Op2 = convertToScalableVector(DAG, OpContainerVT, Op2);
6282+
6283+
// If the result is scalable, we need to broadbast the search vector across
6284+
// the SVE register and then carry out the MATCH.
6285+
if (ResVT.isScalableVector()) {
6286+
Op2 = DAG.getNode(AArch64ISD::DUPLANE128, dl, OpContainerVT, Op2,
6287+
DAG.getTargetConstant(0, dl, MVT::i64));
6288+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResVT, ID, Mask, Op1,
6289+
Op2);
6290+
}
6291+
6292+
// If the result is fixed, we can still use MATCH but we need to wrap the
6293+
// first operand and the mask in scalable vectors before doing so.
62896294
EVT MatchVT = OpContainerVT.changeElementType(MVT::i1);
62906295

6291-
auto ScalableOp1 = convertToScalableVector(DAG, OpContainerVT, Op1);
6292-
auto ScalableOp2 = convertToScalableVector(DAG, OpContainerVT, Op2);
6293-
auto ScalableMask = DAG.getNode(ISD::SIGN_EXTEND, dl, OpVT, Mask);
6294-
ScalableMask = convertFixedMaskToScalableVector(ScalableMask, DAG);
6296+
// Wrap the operands.
6297+
Op1 = convertToScalableVector(DAG, OpContainerVT, Op1);
6298+
Mask = DAG.getNode(ISD::ANY_EXTEND, dl, Op1VT, Mask);
6299+
Mask = convertFixedMaskToScalableVector(Mask, DAG);
62956300

6296-
SDValue Match = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MatchVT, ID,
6297-
ScalableMask, ScalableOp1, ScalableOp2);
6301+
// Carry out the match.
6302+
SDValue Match =
6303+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MatchVT, ID, Mask, Op1, Op2);
62986304

6299-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT,
6305+
// Extract and return the result.
6306+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op1VT,
63006307
DAG.getNode(ISD::SIGN_EXTEND, dl, OpContainerVT, Match),
63016308
DAG.getVectorIdxConstant(0, dl));
63026309
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4041,14 +4041,26 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
40414041
}
40424042
}
40434043

4044-
bool AArch64TTIImpl::hasVectorMatch(VectorType *VT, unsigned SegSize) const {
4045-
// Check that the target has SVE2 (and SVE is available), that `VT' is a
4046-
// legal type for MATCH, and that the segment size is 128-bit.
4044+
bool AArch64TTIImpl::hasVectorMatch(VectorType *VT, unsigned SearchSize) const {
4045+
// Check that (i) the target has SVE2 and SVE is available, (ii) `VT' is a
4046+
// legal type for MATCH, and (iii) the search vector can be broadcast
4047+
// efficently to a legal type.
4048+
//
4049+
// Currently, we require the length of the search vector to match the minimum
4050+
// number of elements of `VT'. In practice this means we only support the
4051+
// cases (nxv16i8, 16), (v16i8, 16), (nxv8i16, 8), and (v8i16, 8), where the
4052+
// first element of the tuples corresponds to the type of the first argument
4053+
// and the second the length of the search vector.
4054+
//
4055+
// In the future we can support more cases. For example, (nxv16i8, 4) could
4056+
// be efficiently supported by using a DUP.S to broadcast the search
4057+
// elements, and more exotic cases like (nxv16i8, 5) could be supported by a
4058+
// sequence of SEL(DUP).
40474059
if (ST->hasSVE2() && ST->isSVEAvailable() &&
40484060
VT->getPrimitiveSizeInBits().getKnownMinValue() == 128 &&
4049-
VT->getElementCount().getKnownMinValue() == SegSize &&
40504061
(VT->getElementCount().getKnownMinValue() == 8 ||
4051-
VT->getElementCount().getKnownMinValue() == 16))
4062+
VT->getElementCount().getKnownMinValue() == 16) &&
4063+
VT->getElementCount().getKnownMinValue() == SearchSize)
40524064
return true;
40534065
return false;
40544066
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
392392
return ST->hasSVE();
393393
}
394394

395-
bool hasVectorMatch(VectorType *VT, unsigned SegSize) const;
395+
bool hasVectorMatch(VectorType *VT, unsigned SearchSize) const;
396396

397397
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
398398
std::optional<FastMathFlags> FMF,

0 commit comments

Comments
 (0)