Skip to content

Commit 5f7502b

Browse files
authored
[AArch64][SVE] Support lowering fixed-length BUILD_VECTORS to ZIPs (#111698)
This allows lowering fixed-length (non-constant) BUILD_VECTORS (<= 128-bit) to a chain of ZIP1 instructions when Neon is not available, rather than using the default lowering, which is to spill to the stack and reload. For example, ``` t5: v4f32 = BUILD_VECTOR(t0, t1, t2, t3) ``` Becomes: ``` zip1 z0.s, z0.s, z1.s // z0 = t0,t1,... zip1 z2.s, z2.s, z3.s // z2 = t2,t3,... zip1 z0.d, z0.d, z2.d // z0 = t0,t1,t2,t3,... ``` When values are already in FRPs, this generally seems to lead to a more compact output with less movement to/from the stack.
1 parent 3ec1b1a commit 5f7502b

19 files changed

+1336
-1545
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/ADT/STLExtras.h"
2626
#include "llvm/ADT/SmallSet.h"
2727
#include "llvm/ADT/SmallVector.h"
28+
#include "llvm/ADT/SmallVectorExtras.h"
2829
#include "llvm/ADT/Statistic.h"
2930
#include "llvm/ADT/StringRef.h"
3031
#include "llvm/ADT/Twine.h"
@@ -2111,7 +2112,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
21112112
setOperationAction(ISD::BITCAST, VT, PreferNEON ? Legal : Default);
21122113
setOperationAction(ISD::BITREVERSE, VT, Default);
21132114
setOperationAction(ISD::BSWAP, VT, Default);
2114-
setOperationAction(ISD::BUILD_VECTOR, VT, Default);
2115+
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
21152116
setOperationAction(ISD::CONCAT_VECTORS, VT, Default);
21162117
setOperationAction(ISD::CTLZ, VT, Default);
21172118
setOperationAction(ISD::CTPOP, VT, Default);
@@ -14395,24 +14396,72 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
1439514396
return SDValue();
1439614397
}
1439714398

14398-
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
14399-
SelectionDAG &DAG) const {
14399+
SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
14400+
SDValue Op, SelectionDAG &DAG) const {
1440014401
EVT VT = Op.getValueType();
14402+
SDLoc DL(Op);
14403+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
14404+
auto *BVN = cast<BuildVectorSDNode>(Op);
1440114405

14402-
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
14403-
if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
14404-
SDLoc DL(Op);
14405-
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
14406-
SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
14407-
SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
14408-
SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
14409-
return convertFromScalableVector(DAG, Op.getValueType(), Seq);
14410-
}
14406+
if (auto SeqInfo = BVN->isConstantSequence()) {
14407+
SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
14408+
SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
14409+
SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
14410+
return convertFromScalableVector(DAG, VT, Seq);
14411+
}
14412+
14413+
unsigned NumElems = VT.getVectorNumElements();
14414+
if (!VT.isPow2VectorType() || VT.getFixedSizeInBits() > 128 ||
14415+
NumElems <= 1 || BVN->isConstant())
14416+
return SDValue();
14417+
14418+
auto IsExtractElt = [](SDValue Op) {
14419+
return Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT;
14420+
};
1441114421

14412-
// Revert to common legalisation for all other variants.
14422+
// For integer types that are not already in vectors limit to at most four
14423+
// elements. This is an arbitrary restriction to avoid many fmovs from GPRs.
14424+
if (VT.getScalarType().isInteger() &&
14425+
NumElems - count_if(Op->op_values(), IsExtractElt) > 4)
1441314426
return SDValue();
14427+
14428+
// Lower (pow2) BUILD_VECTORS that are <= 128-bit to a sequence of ZIP1s.
14429+
SDValue ZeroI64 = DAG.getConstant(0, DL, MVT::i64);
14430+
SmallVector<SDValue, 16> Intermediates = map_to_vector<16>(
14431+
Op->op_values(), [&, Undef = DAG.getUNDEF(ContainerVT)](SDValue Op) {
14432+
return Op.isUndef() ? Undef
14433+
: DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
14434+
ContainerVT, Undef, Op, ZeroI64);
14435+
});
14436+
14437+
ElementCount ZipEC = ContainerVT.getVectorElementCount();
14438+
while (Intermediates.size() > 1) {
14439+
EVT ZipVT = getPackedSVEVectorVT(ZipEC);
14440+
14441+
for (unsigned I = 0; I < Intermediates.size(); I += 2) {
14442+
SDValue Op0 = DAG.getBitcast(ZipVT, Intermediates[I + 0]);
14443+
SDValue Op1 = DAG.getBitcast(ZipVT, Intermediates[I + 1]);
14444+
Intermediates[I / 2] =
14445+
Op1.isUndef() ? Op0
14446+
: DAG.getNode(AArch64ISD::ZIP1, DL, ZipVT, Op0, Op1);
14447+
}
14448+
14449+
Intermediates.resize(Intermediates.size() / 2);
14450+
ZipEC = ZipEC.divideCoefficientBy(2);
1441414451
}
1441514452

14453+
assert(Intermediates.size() == 1);
14454+
SDValue Vec = DAG.getBitcast(ContainerVT, Intermediates[0]);
14455+
return convertFromScalableVector(DAG, VT, Vec);
14456+
}
14457+
14458+
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
14459+
SelectionDAG &DAG) const {
14460+
EVT VT = Op.getValueType();
14461+
14462+
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14463+
return LowerFixedLengthBuildVectorToSVE(Op, DAG);
14464+
1441614465
// Try to build a simple constant vector.
1441714466
Op = NormalizeBuildVector(Op, DAG);
1441814467
// Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,7 @@ class AArch64TargetLowering : public TargetLowering {
12441244
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
12451245
SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
12461246
SelectionDAG &DAG) const;
1247+
SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const;
12471248

12481249
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
12491250
SmallVectorImpl<SDNode *> &Created) const override;

0 commit comments

Comments
 (0)