Skip to content

Commit ece82a1

Browse files
author
git apple-llvm automerger
committed
Merge commit 'd224f08dacb3' from apple/stable/20200714 into swift/main
2 parents 2d5cd94 + d224f08 commit ece82a1

File tree

4 files changed

+363
-19
lines changed

4 files changed

+363
-19
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8607,14 +8607,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
86078607
bool isConstant = true;
86088608
bool AllLanesExtractElt = true;
86098609
unsigned NumConstantLanes = 0;
8610+
unsigned NumDifferentLanes = 0;
8611+
unsigned NumUndefLanes = 0;
86108612
SDValue Value;
86118613
SDValue ConstantValue;
86128614
for (unsigned i = 0; i < NumElts; ++i) {
86138615
SDValue V = Op.getOperand(i);
86148616
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
86158617
AllLanesExtractElt = false;
8616-
if (V.isUndef())
8618+
if (V.isUndef()) {
8619+
++NumUndefLanes;
86178620
continue;
8621+
}
86188622
if (i > 0)
86198623
isOnlyLowElement = false;
86208624
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
@@ -8630,8 +8634,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
86308634

86318635
if (!Value.getNode())
86328636
Value = V;
8633-
else if (V != Value)
8637+
else if (V != Value) {
86348638
usesOnlyOneValue = false;
8639+
++NumDifferentLanes;
8640+
}
86358641
}
86368642

86378643
if (!Value.getNode()) {
@@ -8757,11 +8763,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
87578763
}
87588764
}
87598765

8766+
// If we need to insert a small number of different non-constant elements and
8767+
// the vector width is sufficiently large, prefer using DUP with the common
8768+
// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
8769+
// skip the constant lane handling below.
8770+
bool PreferDUPAndInsert =
8771+
!isConstant && NumDifferentLanes >= 1 &&
8772+
NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
8773+
NumDifferentLanes >= NumConstantLanes;
8774+
87608775
// If there was only one constant value used and for more than one lane,
87618776
// start by splatting that value, then replace the non-constant lanes. This
87628777
// is better than the default, which will perform a separate initialization
87638778
// for each lane.
8764-
if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
8779+
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
87658780
// Firstly, try to materialize the splat constant.
87668781
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
87678782
Val = ConstantBuildVector(Vec, DAG);
@@ -8797,6 +8812,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
87978812
return shuffle;
87988813
}
87998814

8815+
if (PreferDUPAndInsert) {
8816+
// First, build a constant vector with the common element.
8817+
SmallVector<SDValue, 8> Ops;
8818+
for (unsigned I = 0; I < NumElts; ++I)
8819+
Ops.push_back(Value);
8820+
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
8821+
// Next, insert the elements that do not match the common value.
8822+
for (unsigned I = 0; I < NumElts; ++I)
8823+
if (Op.getOperand(I) != Value)
8824+
NewVector =
8825+
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
8826+
Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
8827+
8828+
return NewVector;
8829+
}
8830+
88008831
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
88018832
// know the default expansion would otherwise fall back on something even
88028833
// worse. For a vector with one or two non-undef values, that's

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5251,6 +5251,16 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
52515251
(i64 0)),
52525252
dsub)>;
52535253

5254+
def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
5255+
(i64 VectorIndexH:$imm)),
5256+
(INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
5257+
def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
5258+
(i64 VectorIndexS:$imm)),
5259+
(INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
5260+
def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
5261+
(i64 VectorIndexD:$imm)),
5262+
(INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
5263+
52545264
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
52555265
(f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
52565266
(INSvi16lane

0 commit comments

Comments
 (0)