@@ -3941,6 +3941,24 @@ static bool canScaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts) {
3941
3941
return scaleShuffleElements(Mask, NumDstElts, ScaledMask);
3942
3942
}
3943
3943
3944
+ // Helper to grow the shuffle mask for a larger value type.
3945
+ // NOTE: This is different to scaleShuffleElements which is a same size type.
3946
+ static void growShuffleMask(ArrayRef<int> SrcMask,
3947
+ SmallVectorImpl<int> &DstMask,
3948
+ unsigned SrcSizeInBits, unsigned DstSizeInBits) {
3949
+ assert(DstMask.empty() && "Expected an empty shuffle mas");
3950
+ assert((DstSizeInBits % SrcSizeInBits) == 0 && "Illegal shuffle scale");
3951
+ unsigned Scale = DstSizeInBits / SrcSizeInBits;
3952
+ unsigned NumSrcElts = SrcMask.size();
3953
+ DstMask.assign(SrcMask.begin(), SrcMask.end());
3954
+ for (int &M : DstMask) {
3955
+ if (M < 0)
3956
+ continue;
3957
+ M = (M % NumSrcElts) + ((M / NumSrcElts) * Scale * NumSrcElts);
3958
+ }
3959
+ DstMask.append((Scale - 1) * NumSrcElts, SM_SentinelUndef);
3960
+ }
3961
+
3944
3962
/// Returns true if Elt is a constant zero or a floating point constant +0.0.
3945
3963
bool X86::isZeroNode(SDValue Elt) {
3946
3964
return isNullConstant(Elt) || isNullFPConstant(Elt);
@@ -40456,19 +40474,13 @@ static SDValue combineX86ShuffleChainWithExtract(
40456
40474
}
40457
40475
40458
40476
// Bail if we fail to find a source larger than the existing root.
40459
- unsigned Scale = WideSizeInBits / RootSizeInBits;
40460
40477
if (WideSizeInBits <= RootSizeInBits ||
40461
40478
(WideSizeInBits % RootSizeInBits) != 0)
40462
40479
return SDValue();
40463
40480
40464
40481
// Create new mask for larger type.
40465
- SmallVector<int, 64> WideMask(BaseMask);
40466
- for (int &M : WideMask) {
40467
- if (M < 0)
40468
- continue;
40469
- M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
40470
- }
40471
- WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
40482
+ SmallVector<int, 64> WideMask;
40483
+ growShuffleMask(BaseMask, WideMask, RootSizeInBits, WideSizeInBits);
40472
40484
40473
40485
// Attempt to peek through inputs and adjust mask when we extract from an
40474
40486
// upper subvector.
0 commit comments