Skip to content

Commit dd4bf22

Browse files
committed
[X86] combineBlendOfPermutes - don't introduce lane-crossing permutes without AVX2 support.
Fixes llvm#91433
1 parent 1a49810 commit dd4bf22

File tree

3 files changed

+36
-8
lines changed

3 files changed

+36
-8
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40078,10 +40078,10 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
4007840078

4007940079
// Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
4008040080
// iff we don't demand the same element index for both X and Y.
40081-
static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
40082-
ArrayRef<int> BlendMask,
40083-
const APInt &DemandedElts,
40084-
SelectionDAG &DAG, const SDLoc &DL) {
40081+
static SDValue
40082+
combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
40083+
const APInt &DemandedElts, SelectionDAG &DAG,
40084+
const X86Subtarget &Subtarget, const SDLoc &DL) {
4008540085
assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");
4008640086
if (!N0.hasOneUse() || !N1.hasOneUse())
4008740087
return SDValue();
@@ -40156,6 +40156,11 @@ static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
4015640156
return SDValue();
4015740157
}
4015840158

40159+
// Don't introduce lane-crossing permutes without AVX2.
40160+
if (VT.is256BitVector() && !Subtarget.hasAVX2() &&
40161+
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), NewPermuteMask))
40162+
return SDValue();
40163+
4015940164
SDValue NewBlend =
4016040165
DAG.getVectorShuffle(VT, DL, DAG.getBitcast(VT, Ops0[0]),
4016140166
DAG.getBitcast(VT, Ops1[0]), NewBlendMask);
@@ -41918,9 +41923,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4191841923
case X86ISD::BLENDI: {
4191941924
SmallVector<int, 16> BlendMask;
4192041925
DecodeBLENDMask(NumElts, Op.getConstantOperandVal(2), BlendMask);
41921-
if (SDValue R = combineBlendOfPermutes(VT.getSimpleVT(), Op.getOperand(0),
41922-
Op.getOperand(1), BlendMask,
41923-
DemandedElts, TLO.DAG, SDLoc(Op)))
41926+
if (SDValue R = combineBlendOfPermutes(
41927+
VT.getSimpleVT(), Op.getOperand(0), Op.getOperand(1), BlendMask,
41928+
DemandedElts, TLO.DAG, Subtarget, SDLoc(Op)))
4192441929
return TLO.CombineTo(Op, R);
4192541930
break;
4192641931
}

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,28 @@ define <4 x double> @shuffle_v4f64_0437(<4 x double> %a, <4 x double> %b) {
699699
ret <4 x double> %shuffle
700700
}
701701

702+
; PR91433
703+
define <4 x double> @shuffle_v4f64_2303(<4 x double> %a) {
704+
; AVX1-LABEL: shuffle_v4f64_2303:
705+
; AVX1: # %bb.0:
706+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
707+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
708+
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
709+
; AVX1-NEXT: retq
710+
;
711+
; AVX2-LABEL: shuffle_v4f64_2303:
712+
; AVX2: # %bb.0:
713+
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
714+
; AVX2-NEXT: retq
715+
;
716+
; AVX512VL-LABEL: shuffle_v4f64_2303:
717+
; AVX512VL: # %bb.0:
718+
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
719+
; AVX512VL-NEXT: retq
720+
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
721+
ret <4 x double> %shuffle
722+
}
723+
702724
define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) {
703725
; ALL-LABEL: shuffle_v4f64_0z3z:
704726
; ALL: # %bb.0:

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,9 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
308308
define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) {
309309
; AVX1-LABEL: combine_blend_of_permutes_v8i32:
310310
; AVX1: # %bb.0:
311-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
312311
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
312+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
313+
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
313314
; AVX1-NEXT: ret{{[l|q]}}
314315
;
315316
; AVX2-LABEL: combine_blend_of_permutes_v8i32:

0 commit comments

Comments
 (0)