|
25 | 25 | #include "llvm/ADT/STLExtras.h"
|
26 | 26 | #include "llvm/ADT/SmallSet.h"
|
27 | 27 | #include "llvm/ADT/SmallVector.h"
|
| 28 | +#include "llvm/ADT/SmallVectorExtras.h" |
28 | 29 | #include "llvm/ADT/Statistic.h"
|
29 | 30 | #include "llvm/ADT/StringRef.h"
|
30 | 31 | #include "llvm/ADT/Twine.h"
|
@@ -2111,7 +2112,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
2111 | 2112 | setOperationAction(ISD::BITCAST, VT, PreferNEON ? Legal : Default);
|
2112 | 2113 | setOperationAction(ISD::BITREVERSE, VT, Default);
|
2113 | 2114 | setOperationAction(ISD::BSWAP, VT, Default);
|
2114 |
| - setOperationAction(ISD::BUILD_VECTOR, VT, Default); |
| 2115 | + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
2115 | 2116 | setOperationAction(ISD::CONCAT_VECTORS, VT, Default);
|
2116 | 2117 | setOperationAction(ISD::CTLZ, VT, Default);
|
2117 | 2118 | setOperationAction(ISD::CTPOP, VT, Default);
|
@@ -14395,24 +14396,72 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
|
14395 | 14396 | return SDValue();
|
14396 | 14397 | }
|
14397 | 14398 |
|
14398 |
| -SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, |
14399 |
| - SelectionDAG &DAG) const { |
| 14399 | +SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE( |
| 14400 | + SDValue Op, SelectionDAG &DAG) const { |
14400 | 14401 | EVT VT = Op.getValueType();
|
| 14402 | + SDLoc DL(Op); |
| 14403 | + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
| 14404 | + auto *BVN = cast<BuildVectorSDNode>(Op); |
14401 | 14405 |
|
14402 |
| - if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) { |
14403 |
| - if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) { |
14404 |
| - SDLoc DL(Op); |
14405 |
| - EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
14406 |
| - SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT); |
14407 |
| - SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second); |
14408 |
| - SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps); |
14409 |
| - return convertFromScalableVector(DAG, Op.getValueType(), Seq); |
14410 |
| - } |
| 14406 | + if (auto SeqInfo = BVN->isConstantSequence()) { |
| 14407 | + SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT); |
| 14408 | + SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second); |
| 14409 | + SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps); |
| 14410 | + return convertFromScalableVector(DAG, VT, Seq); |
| 14411 | + } |
| 14412 | + |
| 14413 | + unsigned NumElems = VT.getVectorNumElements(); |
| 14414 | + if (!VT.isPow2VectorType() || VT.getFixedSizeInBits() > 128 || |
| 14415 | + NumElems <= 1 || BVN->isConstant()) |
| 14416 | + return SDValue(); |
| 14417 | + |
| 14418 | + auto IsExtractElt = [](SDValue Op) { |
| 14419 | + return Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT; |
| 14420 | + }; |
14411 | 14421 |
|
14412 |
| - // Revert to common legalisation for all other variants. |
| 14422 | + // For integer types that are not already in vectors limit to at most four |
| 14423 | + // elements. This is an arbitrary restriction to avoid many fmovs from GPRs. |
| 14424 | + if (VT.getScalarType().isInteger() && |
| 14425 | + NumElems - count_if(Op->op_values(), IsExtractElt) > 4) |
14413 | 14426 | return SDValue();
|
| 14427 | + |
| 14428 | + // Lower (pow2) BUILD_VECTORS that are <= 128-bit to a sequence of ZIP1s. |
| 14429 | + SDValue ZeroI64 = DAG.getConstant(0, DL, MVT::i64); |
| 14430 | + SmallVector<SDValue, 16> Intermediates = map_to_vector<16>( |
| 14431 | + Op->op_values(), [&, Undef = DAG.getUNDEF(ContainerVT)](SDValue Op) { |
| 14432 | + return Op.isUndef() ? Undef |
| 14433 | + : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, |
| 14434 | + ContainerVT, Undef, Op, ZeroI64); |
| 14435 | + }); |
| 14436 | + |
| 14437 | + ElementCount ZipEC = ContainerVT.getVectorElementCount(); |
| 14438 | + while (Intermediates.size() > 1) { |
| 14439 | + EVT ZipVT = getPackedSVEVectorVT(ZipEC); |
| 14440 | + |
| 14441 | + for (unsigned I = 0; I < Intermediates.size(); I += 2) { |
| 14442 | + SDValue Op0 = DAG.getBitcast(ZipVT, Intermediates[I + 0]); |
| 14443 | + SDValue Op1 = DAG.getBitcast(ZipVT, Intermediates[I + 1]); |
| 14444 | + Intermediates[I / 2] = |
| 14445 | + Op1.isUndef() ? Op0 |
| 14446 | + : DAG.getNode(AArch64ISD::ZIP1, DL, ZipVT, Op0, Op1); |
| 14447 | + } |
| 14448 | + |
| 14449 | + Intermediates.resize(Intermediates.size() / 2); |
| 14450 | + ZipEC = ZipEC.divideCoefficientBy(2); |
14414 | 14451 | }
|
14415 | 14452 |
|
| 14453 | + assert(Intermediates.size() == 1); |
| 14454 | + SDValue Vec = DAG.getBitcast(ContainerVT, Intermediates[0]); |
| 14455 | + return convertFromScalableVector(DAG, VT, Vec); |
| 14456 | +} |
| 14457 | + |
| 14458 | +SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, |
| 14459 | + SelectionDAG &DAG) const { |
| 14460 | + EVT VT = Op.getValueType(); |
| 14461 | + |
| 14462 | + if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) |
| 14463 | + return LowerFixedLengthBuildVectorToSVE(Op, DAG); |
| 14464 | + |
14416 | 14465 | // Try to build a simple constant vector.
|
14417 | 14466 | Op = NormalizeBuildVector(Op, DAG);
|
14418 | 14467 | // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
|
|
0 commit comments