@@ -5324,15 +5324,18 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
5324
5324
5325
5325
static bool canWidenShuffleElements(ArrayRef<int> Mask,
5326
5326
const APInt &Zeroable,
5327
+ bool V2IsZero,
5327
5328
SmallVectorImpl<int> &WidenedMask) {
5328
- SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5329
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5330
- if (TargetMask[i] == SM_SentinelUndef)
5331
- continue;
5332
- if (Zeroable[i])
5333
- TargetMask[i] = SM_SentinelZero;
5329
+ // Create an alternative mask with info about zeroable elements.
5330
+ // Here we do not set undef elements as zeroable.
5331
+ SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5332
+ if (V2IsZero) {
5333
+ assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
5334
+ for (int i = 0, Size = Mask.size(); i != Size; ++i)
5335
+ if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5336
+ ZeroableMask[i] = SM_SentinelZero;
5334
5337
}
5335
- return canWidenShuffleElements(TargetMask , WidenedMask);
5338
+ return canWidenShuffleElements(ZeroableMask , WidenedMask);
5336
5339
}
5337
5340
5338
5341
static bool canWidenShuffleElements(ArrayRef<int> Mask) {
@@ -14817,8 +14820,10 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
14817
14820
if (Subtarget.hasAVX2() && V2.isUndef())
14818
14821
return SDValue();
14819
14822
14823
+ bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode());
14824
+
14820
14825
SmallVector<int, 4> WidenedMask;
14821
- if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask))
14826
+ if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask))
14822
14827
return SDValue();
14823
14828
14824
14829
bool IsLowZero = (Zeroable & 0x3) == 0x3;
@@ -17095,23 +17100,13 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
17095
17100
17096
17101
bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
17097
17102
17098
- // Create an alternative mask with info about zeroable elements.
17099
- // Here we do not set undef elements as zeroable.
17100
- SmallVector<int, 64> ZeroableMask(OrigMask.begin(), OrigMask.end());
17101
- if (V2IsZero) {
17102
- assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
17103
- for (int i = 0; i != NumElements; ++i)
17104
- if (OrigMask[i] != SM_SentinelUndef && Zeroable[i])
17105
- ZeroableMask[i] = SM_SentinelZero;
17106
- }
17107
-
17108
17103
// Try to collapse shuffles into using a vector type with fewer elements but
17109
17104
// wider element types. We cap this to not form integers or floating point
17110
17105
// elements wider than 64 bits, but it might be interesting to form i128
17111
17106
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
17112
17107
SmallVector<int, 16> WidenedMask;
17113
17108
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
17114
- canWidenShuffleElements(ZeroableMask , WidenedMask)) {
17109
+ canWidenShuffleElements(OrigMask, Zeroable, V2IsZero , WidenedMask)) {
17115
17110
// Shuffle mask widening should not interfere with a broadcast opportunity
17116
17111
// by obfuscating the operands with bitcasts.
17117
17112
// TODO: Avoid lowering directly from this top-level function: make this
0 commit comments