Skip to content

Commit 2426ac6

Browse files
committed
[X86] Add demanded elts for v8f32 VPERMV node
Based off #133923 - test to ensure the VPERMV node as only the lower 128-bit source elements are demanded.
1 parent 54385f5 commit 2426ac6

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,26 @@ define <32 x i8> @concat_pshufb_unnecessary(<16 x i8> %a0, <16 x i8> %a1, <16 x
846846
ret <32 x i8> %res
847847
}
848848

849+
define <8 x float> @demandedelts_vpermps(<8 x float> %a0, <8 x float> %a1) {
850+
; AVX2-LABEL: demandedelts_vpermps:
851+
; AVX2: # %bb.0:
852+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,1,1,0]
853+
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
854+
; AVX2-NEXT: ret{{[l|q]}}
855+
;
856+
; AVX512-LABEL: demandedelts_vpermps:
857+
; AVX512: # %bb.0:
858+
; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
859+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
860+
; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm2 = [3,1,1,0,20,21,22,23]
861+
; AVX512-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
862+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
863+
; AVX512-NEXT: ret{{[l|q]}}
864+
%lo = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 3, i32 1, i32 1, i32 0, i32 0, i32 0, i32 7, i32 7>)
865+
%hi = shufflevector <8 x float> %lo, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
866+
ret <8 x float> %hi
867+
}
868+
849869
define <8 x i32> @constant_fold_permd() {
850870
; AVX2-LABEL: constant_fold_permd:
851871
; AVX2: # %bb.0:

0 commit comments

Comments
 (0)