@@ -8607,14 +8607,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
8607
8607
bool isConstant = true;
8608
8608
bool AllLanesExtractElt = true;
8609
8609
unsigned NumConstantLanes = 0;
8610
+ unsigned NumDifferentLanes = 0;
8611
+ unsigned NumUndefLanes = 0;
8610
8612
SDValue Value;
8611
8613
SDValue ConstantValue;
8612
8614
for (unsigned i = 0; i < NumElts; ++i) {
8613
8615
SDValue V = Op.getOperand(i);
8614
8616
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
8615
8617
AllLanesExtractElt = false;
8616
- if (V.isUndef())
8618
+ if (V.isUndef()) {
8619
+ ++NumUndefLanes;
8617
8620
continue;
8621
+ }
8618
8622
if (i > 0)
8619
8623
isOnlyLowElement = false;
8620
8624
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
@@ -8630,8 +8634,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
8630
8634
8631
8635
if (!Value.getNode())
8632
8636
Value = V;
8633
- else if (V != Value)
8637
+ else if (V != Value) {
8634
8638
usesOnlyOneValue = false;
8639
+ ++NumDifferentLanes;
8640
+ }
8635
8641
}
8636
8642
8637
8643
if (!Value.getNode()) {
@@ -8757,11 +8763,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
8757
8763
}
8758
8764
}
8759
8765
8766
+ // If we need to insert a small number of different non-constant elements and
8767
+ // the vector width is sufficiently large, prefer using DUP with the common
8768
+ // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
8769
+ // skip the constant lane handling below.
8770
+ bool PreferDUPAndInsert =
8771
+ !isConstant && NumDifferentLanes >= 1 &&
8772
+ NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
8773
+ NumDifferentLanes >= NumConstantLanes;
8774
+
8760
8775
// If there was only one constant value used and for more than one lane,
8761
8776
// start by splatting that value, then replace the non-constant lanes. This
8762
8777
// is better than the default, which will perform a separate initialization
8763
8778
// for each lane.
8764
- if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
8779
+ if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
8765
8780
// Firstly, try to materialize the splat constant.
8766
8781
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
8767
8782
Val = ConstantBuildVector(Vec, DAG);
@@ -8797,6 +8812,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
8797
8812
return shuffle;
8798
8813
}
8799
8814
8815
+ if (PreferDUPAndInsert) {
8816
+ // First, build a constant vector with the common element.
8817
+ SmallVector<SDValue, 8> Ops;
8818
+ for (unsigned I = 0; I < NumElts; ++I)
8819
+ Ops.push_back(Value);
8820
+ SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
8821
+ // Next, insert the elements that do not match the common value.
8822
+ for (unsigned I = 0; I < NumElts; ++I)
8823
+ if (Op.getOperand(I) != Value)
8824
+ NewVector =
8825
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
8826
+ Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
8827
+
8828
+ return NewVector;
8829
+ }
8830
+
8800
8831
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8801
8832
// know the default expansion would otherwise fall back on something even
8802
8833
// worse. For a vector with one or two non-undef values, that's
0 commit comments