@@ -8690,6 +8690,33 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
8690
8690
return LowerShift(Res, Subtarget, DAG);
8691
8691
}
8692
8692
8693
+ /// Attempt to lower a BUILD_VECTOR of scalar values to a shuffle of splats
8694
+ /// representing a blend.
8695
+ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
8696
+ X86Subtarget const &Subtarget,
8697
+ SelectionDAG &DAG) {
8698
+ if (!Subtarget.hasAVX())
8699
+ return {};
8700
+
8701
+ auto VT = BVOp->getSimpleValueType(0u);
8702
+
8703
+ if (VT == MVT::v4f64 && BVOp->getNumOperands() == 4u) {
8704
+ SDValue Op0 = BVOp->getOperand(0u);
8705
+ SDValue Op1 = BVOp->getOperand(1u);
8706
+ SDValue Op2 = BVOp->getOperand(2u);
8707
+ SDValue Op3 = BVOp->getOperand(3u);
8708
+
8709
+ // Match X,Y,Y,X inputs.
8710
+ if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
8711
+ auto NewOp0 = DAG.getSplatBuildVector(VT, DL, Op0);
8712
+ auto NewOp1 = DAG.getSplatBuildVector(VT, DL, Op1);
8713
+ return DAG.getVectorShuffle(VT, DL, NewOp0, NewOp1, {0, 5, 6, 3});
8714
+ }
8715
+ }
8716
+
8717
+ return {};
8718
+ }
8719
+
8693
8720
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
8694
8721
/// functionality to do this, so it's all zeros, all ones, or some derivation
8695
8722
/// that is cheap to calculate.
@@ -9040,39 +9067,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9040
9067
MVT OpEltVT = Op.getOperand(0).getSimpleValueType();
9041
9068
unsigned NumElems = Op.getNumOperands();
9042
9069
9043
- // Match BUILD_VECTOR of scalars that we can lower to X86ISD::BLENDI via
9044
- // shuffles.
9045
- //
9046
- // v4f64 = BUILD_VECTOR X,Y,Y,X
9047
- // >>>
9048
- // t1: v4f64 = BUILD_VECTOR X,u,u,u
9049
- // t3: v4f64 = vector_shuffle<0,u,u,0> t1, u
9050
- // t2: v4f64 = BUILD_VECTOR Y,u,u,u
9051
- // t4: v4f64 = vector_shuffle<u,0,0,u> t2, u
9052
- // v4f64 = vector_shuffle<0,5,6,3> t3, t4
9053
- //
9054
- if (Subtarget.hasAVX() && VT == MVT::v4f64 && Op->getNumOperands() == 4u) {
9055
- auto Op0 = Op->getOperand(0u);
9056
- auto Op1 = Op->getOperand(1u);
9057
- auto Op2 = Op->getOperand(2u);
9058
- auto Op3 = Op->getOperand(3u);
9059
-
9060
- // Match X,Y,Y,X inputs.
9061
- if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
9062
- auto PsnVal = DAG.getUNDEF(MVT::f64);
9063
-
9064
- auto NewOp0 = DAG.getBuildVector(VT, dl, {Op0, PsnVal, PsnVal, PsnVal});
9065
- NewOp0 = DAG.getVectorShuffle(VT, dl, NewOp0, DAG.getUNDEF(VT),
9066
- {0, -1, -1, 0});
9067
-
9068
- auto NewOp1 = DAG.getBuildVector(VT, dl, {Op1, PsnVal, PsnVal, PsnVal});
9069
- NewOp1 = DAG.getVectorShuffle(VT, dl, NewOp1, DAG.getUNDEF(VT),
9070
- {-1, 0, 0, -1});
9071
-
9072
- return DAG.getVectorShuffle(VT, dl, NewOp0, NewOp1, {0, 5, 6, 3});
9073
- }
9074
- }
9075
-
9076
9070
// Generate vectors for predicate vectors.
9077
9071
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
9078
9072
return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);
@@ -9185,6 +9179,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9185
9179
return Broadcast;
9186
9180
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
9187
9181
return BitOp;
9182
+ if (SDValue Blend = lowerBuildVectorAsBlend(BV, dl, Subtarget, DAG))
9183
+ return Blend;
9188
9184
9189
9185
unsigned NumZero = ZeroMask.popcount();
9190
9186
unsigned NumNonZero = NonZeroMask.popcount();
0 commit comments