Skip to content

Commit 5636eb8

Browse files
committed
[X86] combineBlendOfPermutes - allow whole-lane permutation on AVX1 targets.
dd4bf22 fixed #91433 but meant we couldn't use vperm2f128 to permute entire 128-bit lanes - if the new 256-bit permutation mask can be scaled to 2x128-bit elements, then we can still fold.
1 parent a617190 commit 5636eb8

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40161,9 +40161,12 @@ combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
4016140161
return SDValue();
4016240162
}
4016340163

40164-
// Don't introduce lane-crossing permutes without AVX2.
40164+
// Don't introduce lane-crossing permutes without AVX2, unless it can be
40165+
// widened to a lane permute (vperm2f128).
4016540166
if (VT.is256BitVector() && !Subtarget.hasAVX2() &&
40166-
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), NewPermuteMask))
40167+
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(),
40168+
NewPermuteMask) &&
40169+
!canScaleShuffleElements(NewPermuteMask, 2))
4016740170
return SDValue();
4016840171

4016940172
SDValue NewBlend =

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,8 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
308308
define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) {
309309
; AVX1-LABEL: combine_blend_of_permutes_v8i32:
310310
; AVX1: # %bb.0:
311+
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
311312
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
312-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
313-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
314313
; AVX1-NEXT: ret{{[l|q]}}
315314
;
316315
; AVX2-LABEL: combine_blend_of_permutes_v8i32:

0 commit comments

Comments
 (0)