@@ -35889,22 +35889,23 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
35889
35889
}
35890
35890
35891
35891
// Narrow shuffle mask to v4x128.
35892
- SmallVector<int, 4> Mask ;
35892
+ SmallVector<int, 4> ScaledMask ;
35893
35893
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
35894
- narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask );
35894
+ narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, ScaledMask );
35895
35895
35896
35896
// Try to lower to vshuf64x2/vshuf32x4.
35897
- auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
35898
- SDValue V1, SDValue V2, SelectionDAG &DAG) {
35897
+ auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
35898
+ ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
35899
+ SelectionDAG &DAG) {
35899
35900
unsigned PermMask = 0;
35900
35901
// Insure elements came from the same Op.
35901
35902
SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
35902
35903
for (int i = 0; i < 4; ++i) {
35903
- assert(Mask [i] >= -1 && "Illegal shuffle sentinel value");
35904
- if (Mask [i] < 0)
35904
+ assert(ScaledMask [i] >= -1 && "Illegal shuffle sentinel value");
35905
+ if (ScaledMask [i] < 0)
35905
35906
continue;
35906
35907
35907
- SDValue Op = Mask [i] >= 4 ? V2 : V1;
35908
+ SDValue Op = ScaledMask [i] >= 4 ? V2 : V1;
35908
35909
unsigned OpIndex = i / 2;
35909
35910
if (Ops[OpIndex].isUndef())
35910
35911
Ops[OpIndex] = Op;
@@ -35914,7 +35915,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
35914
35915
// Convert the 128-bit shuffle mask selection values into 128-bit
35915
35916
// selection bits defined by a vshuf64x2 instruction's immediate control
35916
35917
// byte.
35917
- PermMask |= (Mask [i] % 4) << (i * 2);
35918
+ PermMask |= (ScaledMask [i] % 4) << (i * 2);
35918
35919
}
35919
35920
35920
35921
return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
@@ -35926,18 +35927,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
35926
35927
// FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
35927
35928
// doesn't work because our mask is for 128 bits and we don't have an MVT
35928
35929
// to match that.
35929
- bool PreferPERMQ =
35930
- UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) &&
35931
- isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) &&
35932
- isUndefOrInRange(Mask[3], 2, 4) &&
35933
- (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) &&
35934
- (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
35935
-
35936
- if (!isAnyZero(Mask) && !PreferPERMQ) {
35930
+ bool PreferPERMQ = UnaryShuffle && isUndefOrInRange(ScaledMask[0], 0, 2) &&
35931
+ isUndefOrInRange(ScaledMask[1], 0, 2) &&
35932
+ isUndefOrInRange(ScaledMask[2], 2, 4) &&
35933
+ isUndefOrInRange(ScaledMask[3], 2, 4) &&
35934
+ (ScaledMask[0] < 0 || ScaledMask[2] < 0 ||
35935
+ ScaledMask[0] == (ScaledMask[2] % 2)) &&
35936
+ (ScaledMask[1] < 0 || ScaledMask[3] < 0 ||
35937
+ ScaledMask[1] == (ScaledMask[3] % 2));
35938
+
35939
+ if (!isAnyZero(ScaledMask) && !PreferPERMQ) {
35937
35940
if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
35938
35941
return SDValue(); // Nothing to do!
35939
35942
MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
35940
- if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask , V1, V2, DAG))
35943
+ if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask , V1, V2, DAG))
35941
35944
return DAG.getBitcast(RootVT, V);
35942
35945
}
35943
35946
}
0 commit comments