Skip to content

Commit 35c0848

Browse files
committed
[NFC][X86] combineX86ShuffleChain(): hoist Mask variable higher up
Having `NewMask` outside of an if and rebinding `BaseMask` `ArrayRef` to it is confusing. Instead, just move the `Mask` vector higher up, and change the code that earlier had no access to it but now does to use `Mask` instead of `BaseMask`. This has no other intentional changes.
1 parent 916cdc3 commit 35c0848

File tree

1 file changed

+29
-32
lines changed

1 file changed

+29
-32
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35836,14 +35836,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3583635836
return CanonicalizeShuffleInput(RootVT, V1);
3583735837
}
3583835838

35839+
SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end());
35840+
3583935841
// Adjust mask elements that pick from a splat input to be identity mask elts,
3584035842
// i.e. to pick from the same lane of the input as the mask element is in.
3584135843
// This may allow to simplify the shuffle into a blend.
35842-
SmallVector<int> NewMask;
3584335844
if (InputIsSplat[0] || InputIsSplat[1]) {
35844-
NewMask.assign(BaseMask.begin(), BaseMask.end());
3584535845
for (unsigned i = 0; i != NumBaseMaskElts; ++i) {
35846-
int &M = NewMask[i];
35846+
int &M = Mask[i];
3584735847
assert(isUndefOrZeroOrInRange(M, 0, 2 * NumBaseMaskElts) &&
3584835848
"OOB mask element?");
3584935849
if (M < 0)
@@ -35853,16 +35853,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3585335853
if (InputIsSplat[InputIdx] && i < InputNumElts[InputIdx])
3585435854
M = i + InputIdx * NumBaseMaskElts; // Pick from the same lane of input.
3585535855
}
35856-
BaseMask = std::move(NewMask);
3585735856
}
3585835857

3585935858
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
3586035859
// etc. can be simplified.
3586135860
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
3586235861
SmallVector<int> ScaledMask, IdentityMask;
3586335862
unsigned NumElts = VT1.getVectorNumElements();
35864-
if (BaseMask.size() <= NumElts &&
35865-
scaleShuffleElements(BaseMask, NumElts, ScaledMask)) {
35863+
if (Mask.size() <= NumElts &&
35864+
scaleShuffleElements(Mask, NumElts, ScaledMask)) {
3586635865
for (unsigned i = 0; i != NumElts; ++i)
3586735866
IdentityMask.push_back(i);
3586835867
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
@@ -35876,22 +35875,22 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3587635875
// If the upper subvectors are zeroable, then an extract+insert is more
3587735876
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
3587835877
// to zero the upper subvectors.
35879-
if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
35878+
if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
3588035879
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
3588135880
return SDValue(); // Nothing to do!
35882-
assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
35881+
assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
3588335882
"Unexpected lane shuffle");
3588435883
Res = CanonicalizeShuffleInput(RootVT, V1);
35885-
unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
35886-
bool UseZero = isAnyZero(BaseMask);
35884+
unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
35885+
bool UseZero = isAnyZero(Mask);
3588735886
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
3588835887
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
3588935888
}
3589035889

3589135890
// Narrow shuffle mask to v4x128.
3589235891
SmallVector<int, 4> ScaledMask;
3589335892
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
35894-
narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, ScaledMask);
35893+
narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);
3589535894

3589635895
// Try to lower to vshuf64x2/vshuf32x4.
3589735896
auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
@@ -35950,20 +35949,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3595035949
// If the upper half is zeroable, then an extract+insert is more optimal
3595135950
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
3595235951
// zero the upper half.
35953-
if (isUndefOrZero(BaseMask[1])) {
35952+
if (isUndefOrZero(Mask[1])) {
3595435953
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
3595535954
return SDValue(); // Nothing to do!
35956-
assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
35955+
assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
3595735956
Res = CanonicalizeShuffleInput(RootVT, V1);
35958-
Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
35959-
return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
35960-
DL, 256);
35957+
Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
35958+
return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
35959+
256);
3596135960
}
3596235961

3596335962
// If we're splatting the low subvector, an insert-subvector 'concat'
3596435963
// pattern is quicker than VPERM2X128.
3596535964
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
35966-
if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) {
35965+
if (Mask[0] == 0 && Mask[1] == 0 && !Subtarget.hasAVX2()) {
3596735966
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
3596835967
return SDValue(); // Nothing to do!
3596935968
Res = CanonicalizeShuffleInput(RootVT, V1);
@@ -35978,11 +35977,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3597835977
// we need to use the zeroing feature.
3597935978
// Prefer blends for sequential shuffles unless we are optimizing for size.
3598035979
if (UnaryShuffle &&
35981-
!(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
35982-
(OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
35980+
!(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
35981+
(OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
3598335982
unsigned PermMask = 0;
35984-
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
35985-
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
35983+
PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
35984+
PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
3598635985
return DAG.getNode(
3598735986
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
3598835987
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
@@ -35993,16 +35992,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3599335992

3599435993
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
3599535994
if (!UnaryShuffle && !IsMaskedShuffle) {
35996-
assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
35995+
assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
3599735996
"Unexpected shuffle sentinel value");
3599835997
// Prefer blends to X86ISD::VPERM2X128.
35999-
if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
36000-
(BaseMask[0] == 2 && BaseMask[1] == 1))) {
35998+
if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
3600135999
unsigned PermMask = 0;
36002-
PermMask |= ((BaseMask[0] & 3) << 0);
36003-
PermMask |= ((BaseMask[1] & 3) << 4);
36004-
SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
36005-
SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
36000+
PermMask |= ((Mask[0] & 3) << 0);
36001+
PermMask |= ((Mask[1] & 3) << 4);
36002+
SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
36003+
SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
3600636004
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
3600736005
CanonicalizeShuffleInput(RootVT, LHS),
3600836006
CanonicalizeShuffleInput(RootVT, RHS),
@@ -36013,13 +36011,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3601336011

3601436012
// For masks that have been widened to 128-bit elements or more,
3601536013
// narrow back down to 64-bit elements.
36016-
SmallVector<int, 64> Mask;
3601736014
if (BaseMaskEltSizeInBits > 64) {
3601836015
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
3601936016
int MaskScale = BaseMaskEltSizeInBits / 64;
36020-
narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
36021-
} else {
36022-
Mask.assign(BaseMask.begin(), BaseMask.end());
36017+
SmallVector<int, 64> ScaledMask;
36018+
narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
36019+
Mask = std::move(ScaledMask);
3602336020
}
3602436021

3602536022
// For masked shuffles, we're trying to match the root width for better

0 commit comments

Comments
 (0)