@@ -9926,6 +9926,42 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
9926
9926
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
9927
9927
}
9928
9928
9929
+ // Canonicalize SHUFPD mask to improve chances of further folding.
9930
+ // Mask elements are assumed to be -1, 0 or 1 to match the SHUFPD lo/hi pattern.
9931
+ static unsigned getSHUFPDImm(ArrayRef<int> Mask) {
9932
+ assert((Mask.size() == 2 || Mask.size() == 4 || Mask.size() == 8) &&
9933
+ "Unexpected SHUFPD mask size");
9934
+ assert(all_of(Mask, [](int M) { return -1 <= M && M <= 1; }) &&
9935
+ "Unexpected SHUFPD mask elements");
9936
+
9937
+ // If the mask only uses one non-undef element, then fully 'splat' it to
9938
+ // improve later broadcast matching.
9939
+ int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin();
9940
+ assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask");
9941
+
9942
+ int FirstElt = Mask[FirstIndex];
9943
+ if (all_of(Mask, [FirstElt](int M) { return M < 0 || M == FirstElt; }) &&
9944
+ count_if(Mask, [FirstElt](int M) { return M == FirstElt; }) > 1) {
9945
+ unsigned Imm = 0;
9946
+ for (unsigned I = 0, E = Mask.size(); I != E; ++I)
9947
+ Imm |= FirstElt << I;
9948
+ return Imm;
9949
+ }
9950
+
9951
+ // Attempt to keep any undef elements in place to improve chances of the
9952
+ // shuffle becoming a (commutative) blend.
9953
+ unsigned Imm = 0;
9954
+ for (unsigned I = 0, E = Mask.size(); I != E; ++I)
9955
+ Imm |= (Mask[I] < 0 ? (I & 1) : Mask[I]) << I;
9956
+
9957
+ return Imm;
9958
+ }
9959
+
9960
+ static SDValue getSHUFPDImmForMask(ArrayRef<int> Mask, const SDLoc &DL,
9961
+ SelectionDAG &DAG) {
9962
+ return DAG.getTargetConstant(getSHUFPDImm(Mask), DL, MVT::i8);
9963
+ }
9964
+
9929
9965
// The Shuffle result is as follow:
9930
9966
// 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
9931
9967
// Each Zeroable's element correspond to a particular Mask's element.
@@ -14871,7 +14907,7 @@ static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,
14871
14907
14872
14908
int LHSMask[4] = {-1, -1, -1, -1};
14873
14909
int RHSMask[4] = {-1, -1, -1, -1};
14874
- unsigned SHUFPMask = 0 ;
14910
+ int SHUFPDMask[4] = {-1, -1, -1, -1} ;
14875
14911
14876
14912
// As SHUFPD uses a single LHS/RHS element per lane, we can always
14877
14913
// perform the shuffle once the lanes have been shuffled in place.
@@ -14882,13 +14918,13 @@ static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,
14882
14918
int LaneBase = i & ~1;
14883
14919
auto &LaneMask = (i & 1) ? RHSMask : LHSMask;
14884
14920
LaneMask[LaneBase + (M & 1)] = M;
14885
- SHUFPMask |= ( M & 1) << i ;
14921
+ SHUFPDMask[i] = M & 1;
14886
14922
}
14887
14923
14888
14924
SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask);
14889
14925
SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask);
14890
14926
return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS,
14891
- DAG.getTargetConstant(SHUFPMask , DL, MVT::i8 ));
14927
+ getSHUFPDImmForMask(SHUFPDMask , DL, DAG ));
14892
14928
}
14893
14929
14894
14930
/// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -15800,9 +15836,9 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
15800
15836
15801
15837
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
15802
15838
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
15803
- ShuffleImm = 0 ;
15804
- bool ShufpdMask = true;
15805
- bool CommutableMask = true ;
15839
+ bool IsSHUFPD = true ;
15840
+ bool IsCommutable = true;
15841
+ SmallVector<int, 8> SHUFPDMask(NumElts, -1) ;
15806
15842
for (int i = 0; i < NumElts; ++i) {
15807
15843
if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1])
15808
15844
continue;
@@ -15811,20 +15847,21 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
15811
15847
int Val = (i & 6) + NumElts * (i & 1);
15812
15848
int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
15813
15849
if (Mask[i] < Val || Mask[i] > Val + 1)
15814
- ShufpdMask = false;
15850
+ IsSHUFPD = false;
15815
15851
if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
15816
- CommutableMask = false;
15817
- ShuffleImm |= ( Mask[i] % 2) << i ;
15852
+ IsCommutable = false;
15853
+ SHUFPDMask[i] = Mask[i] % 2;
15818
15854
}
15819
15855
15820
- if (!ShufpdMask && !CommutableMask )
15856
+ if (!IsSHUFPD && !IsCommutable )
15821
15857
return false;
15822
15858
15823
- if (!ShufpdMask && CommutableMask )
15859
+ if (!IsSHUFPD && IsCommutable )
15824
15860
std::swap(V1, V2);
15825
15861
15826
15862
ForceV1Zero = ZeroLane[0];
15827
15863
ForceV2Zero = ZeroLane[1];
15864
+ ShuffleImm = getSHUFPDImm(SHUFPDMask);
15828
15865
return true;
15829
15866
}
15830
15867
0 commit comments