@@ -10930,7 +10930,7 @@ static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask,
10930
10930
///
10931
10931
/// SM_SentinelZero is accepted as a valid negative index but must match in
10932
10932
/// both.
10933
- static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
10933
+ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
10934
10934
ArrayRef<int> ExpectedMask,
10935
10935
SDValue V1 = SDValue(),
10936
10936
SDValue V2 = SDValue()) {
@@ -10944,6 +10944,12 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
10944
10944
if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size))
10945
10945
return false;
10946
10946
10947
+ // Don't use V1/V2 if they're not the same size as the shuffle mask type.
10948
+ if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits())
10949
+ V1 = SDValue();
10950
+ if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
10951
+ V2 = SDValue();
10952
+
10947
10953
for (int i = 0; i < Size; ++i) {
10948
10954
int MaskIdx = Mask[i];
10949
10955
int ExpectedIdx = ExpectedMask[i];
@@ -11002,8 +11008,8 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
11002
11008
SmallVector<int, 8> Unpckhwd;
11003
11009
createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
11004
11010
/* Unary = */ false);
11005
- bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) ||
11006
- isTargetShuffleEquivalent(Mask, Unpckhwd));
11011
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) ||
11012
+ isTargetShuffleEquivalent(VT, Mask, Unpckhwd));
11007
11013
return IsUnpackwdMask;
11008
11014
}
11009
11015
@@ -11020,8 +11026,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
11020
11026
for (unsigned i = 0; i != 4; ++i) {
11021
11027
SmallVector<int, 16> UnpackMask;
11022
11028
createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
11023
- if (isTargetShuffleEquivalent(Mask, UnpackMask) ||
11024
- isTargetShuffleEquivalent(CommutedMask, UnpackMask))
11029
+ if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) ||
11030
+ isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask))
11025
11031
return true;
11026
11032
}
11027
11033
return false;
@@ -11214,15 +11220,15 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
11214
11220
// Attempt to match the target mask against the unpack lo/hi mask patterns.
11215
11221
SmallVector<int, 64> Unpckl, Unpckh;
11216
11222
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
11217
- if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
11223
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
11218
11224
UnpackOpcode = X86ISD::UNPCKL;
11219
11225
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
11220
11226
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
11221
11227
return true;
11222
11228
}
11223
11229
11224
11230
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
11225
- if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
11231
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
11226
11232
UnpackOpcode = X86ISD::UNPCKH;
11227
11233
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
11228
11234
V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
@@ -11260,14 +11266,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
11260
11266
// If a binary shuffle, commute and try again.
11261
11267
if (!IsUnary) {
11262
11268
ShuffleVectorSDNode::commuteMask(Unpckl);
11263
- if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
11269
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
11264
11270
UnpackOpcode = X86ISD::UNPCKL;
11265
11271
std::swap(V1, V2);
11266
11272
return true;
11267
11273
}
11268
11274
11269
11275
ShuffleVectorSDNode::commuteMask(Unpckh);
11270
- if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
11276
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
11271
11277
UnpackOpcode = X86ISD::UNPCKH;
11272
11278
std::swap(V1, V2);
11273
11279
return true;
@@ -11638,14 +11644,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
11638
11644
// Try binary shuffle.
11639
11645
SmallVector<int, 32> BinaryMask;
11640
11646
createPackShuffleMask(VT, BinaryMask, false, NumStages);
11641
- if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
11647
+ if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2))
11642
11648
if (MatchPACK(V1, V2, PackVT))
11643
11649
return true;
11644
11650
11645
11651
// Try unary shuffle.
11646
11652
SmallVector<int, 32> UnaryMask;
11647
11653
createPackShuffleMask(VT, UnaryMask, true, NumStages);
11648
- if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
11654
+ if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1))
11649
11655
if (MatchPACK(V1, V1, PackVT))
11650
11656
return true;
11651
11657
}
@@ -34522,17 +34528,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
34522
34528
// instructions are no slower than UNPCKLPD but has the option to
34523
34529
// fold the input operand into even an unaligned memory load.
34524
34530
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
34525
- if (isTargetShuffleEquivalent(Mask, {0, 0}, V1)) {
34531
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) {
34526
34532
Shuffle = X86ISD::MOVDDUP;
34527
34533
SrcVT = DstVT = MVT::v2f64;
34528
34534
return true;
34529
34535
}
34530
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}, V1)) {
34536
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
34531
34537
Shuffle = X86ISD::MOVSLDUP;
34532
34538
SrcVT = DstVT = MVT::v4f32;
34533
34539
return true;
34534
34540
}
34535
- if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3}, V1)) {
34541
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) {
34536
34542
Shuffle = X86ISD::MOVSHDUP;
34537
34543
SrcVT = DstVT = MVT::v4f32;
34538
34544
return true;
@@ -34541,17 +34547,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
34541
34547
34542
34548
if (MaskVT.is256BitVector() && AllowFloatDomain) {
34543
34549
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
34544
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}, V1)) {
34550
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
34545
34551
Shuffle = X86ISD::MOVDDUP;
34546
34552
SrcVT = DstVT = MVT::v4f64;
34547
34553
return true;
34548
34554
}
34549
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
34555
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
34550
34556
Shuffle = X86ISD::MOVSLDUP;
34551
34557
SrcVT = DstVT = MVT::v8f32;
34552
34558
return true;
34553
34559
}
34554
- if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) {
34560
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) {
34555
34561
Shuffle = X86ISD::MOVSHDUP;
34556
34562
SrcVT = DstVT = MVT::v8f32;
34557
34563
return true;
@@ -34561,19 +34567,21 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
34561
34567
if (MaskVT.is512BitVector() && AllowFloatDomain) {
34562
34568
assert(Subtarget.hasAVX512() &&
34563
34569
"AVX512 required for 512-bit vector shuffles");
34564
- if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
34570
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
34565
34571
Shuffle = X86ISD::MOVDDUP;
34566
34572
SrcVT = DstVT = MVT::v8f64;
34567
34573
return true;
34568
34574
}
34569
34575
if (isTargetShuffleEquivalent(
34570
- Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) {
34576
+ MaskVT, Mask,
34577
+ {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) {
34571
34578
Shuffle = X86ISD::MOVSLDUP;
34572
34579
SrcVT = DstVT = MVT::v16f32;
34573
34580
return true;
34574
34581
}
34575
34582
if (isTargetShuffleEquivalent(
34576
- Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) {
34583
+ MaskVT, Mask,
34584
+ {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) {
34577
34585
Shuffle = X86ISD::MOVSHDUP;
34578
34586
SrcVT = DstVT = MVT::v16f32;
34579
34587
return true;
@@ -34732,27 +34740,27 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
34732
34740
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
34733
34741
34734
34742
if (MaskVT.is128BitVector()) {
34735
- if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
34743
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) {
34736
34744
V2 = V1;
34737
34745
V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1);
34738
34746
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS;
34739
34747
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
34740
34748
return true;
34741
34749
}
34742
- if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
34750
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) {
34743
34751
V2 = V1;
34744
34752
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS;
34745
34753
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
34746
34754
return true;
34747
34755
}
34748
- if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2( ) &&
34749
- (AllowFloatDomain || !Subtarget.hasSSE41())) {
34756
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) &&
34757
+ Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) {
34750
34758
std::swap(V1, V2);
34751
34759
Shuffle = X86ISD::MOVSD;
34752
34760
SrcVT = DstVT = MVT::v2f64;
34753
34761
return true;
34754
34762
}
34755
- if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
34763
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) &&
34756
34764
(AllowFloatDomain || !Subtarget.hasSSE41())) {
34757
34765
Shuffle = X86ISD::MOVSS;
34758
34766
SrcVT = DstVT = MVT::v4f32;
@@ -35325,7 +35333,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
35325
35333
// from a scalar.
35326
35334
// TODO: Handle other insertions here as well?
35327
35335
if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
35328
- Subtarget.hasSSE41() && !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) {
35336
+ Subtarget.hasSSE41() &&
35337
+ !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) {
35329
35338
if (MaskEltSizeInBits == 32) {
35330
35339
SDValue SrcV1 = V1, SrcV2 = V2;
35331
35340
if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask,
@@ -35340,7 +35349,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
35340
35349
return DAG.getBitcast(RootVT, Res);
35341
35350
}
35342
35351
}
35343
- if (MaskEltSizeInBits == 64 && isTargetShuffleEquivalent(Mask, {0, 2}) &&
35352
+ if (MaskEltSizeInBits == 64 &&
35353
+ isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) &&
35344
35354
V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
35345
35355
V2.getScalarValueSizeInBits() <= 32) {
35346
35356
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
0 commit comments