Skip to content

Commit 269c40f

Browse files
committed
[X86] Add tests for concatenation of VPERMV nodes
1 parent 37c3fbf commit 269c40f

File tree

1 file changed

+57
-2
lines changed

1 file changed

+57
-2
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,61 @@ define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
9898
}
9999
declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
100100

101+
define <16 x i16> @concat2_permw_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
102+
; CHECK-LABEL: concat2_permw_v8i16:
103+
; CHECK: # %bb.0:
104+
; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
105+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
106+
; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
107+
; CHECK-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
108+
; CHECK-NEXT: ret{{[l|q]}}
109+
%lo = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x, <8 x i16> <i16 7, i16 0, i16 6, i16 1, i16 5, i16 2, i16 4, i16 3>)
110+
%hi = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %y, <8 x i16> <i16 5, i16 2, i16 4, i16 3, i16 7, i16 0, i16 6, i16 1>)
111+
%res = shufflevector <8 x i16> %lo, <8 x i16> %hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
112+
ret <16 x i16> %res
113+
}
114+
115+
define <32 x i16> @concat4_permw_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z, <8 x i16> %w) nounwind {
116+
; X86-LABEL: concat4_permw_v8i16:
117+
; X86: # %bb.0:
118+
; X86-NEXT: pushl %ebp
119+
; X86-NEXT: movl %esp, %ebp
120+
; X86-NEXT: andl $-16, %esp
121+
; X86-NEXT: subl $16, %esp
122+
; X86-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
123+
; X86-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
124+
; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
125+
; X86-NEXT: vmovdqa 8(%ebp), %xmm3
126+
; X86-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
127+
; X86-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
128+
; X86-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
129+
; X86-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
130+
; X86-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
131+
; X86-NEXT: movl %ebp, %esp
132+
; X86-NEXT: popl %ebp
133+
; X86-NEXT: retl
134+
;
135+
; X64-LABEL: concat4_permw_v8i16:
136+
; X64: # %bb.0:
137+
; X64-NEXT: # kill: def $xmm3 killed $xmm3 def $ymm3
138+
; X64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
139+
; X64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
140+
; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
141+
; X64-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
142+
; X64-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
143+
; X64-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
144+
; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
145+
; X64-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
146+
; X64-NEXT: retq
147+
%px = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x, <8 x i16> <i16 7, i16 0, i16 6, i16 1, i16 5, i16 2, i16 4, i16 3>)
148+
%py = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %y, <8 x i16> <i16 5, i16 2, i16 4, i16 3, i16 7, i16 0, i16 6, i16 1>)
149+
%pz = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %z, <8 x i16> <i16 6, i16 1, i16 7, i16 0, i16 4, i16 3, i16 5, i16 2>)
150+
%pw = tail call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %w, <8 x i16> <i16 4, i16 3, i16 5, i16 2, i16 6, i16 1, i16 7, i16 0>)
151+
%lo = shufflevector <8 x i16> %px, <8 x i16> %py, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
152+
%hi = shufflevector <8 x i16> %pz, <8 x i16> %pw, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
153+
%res = shufflevector <16 x i16> %lo, <16 x i16> %hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
154+
ret <32 x i16> %res
155+
}
101156

102157
define <8 x i32> @concat_vrotli_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
103158
; CHECK-LABEL: concat_vrotli_v4i32:
@@ -204,11 +259,11 @@ define i64 @PR55050() {
204259
; X86-NEXT: xorl %edx, %edx
205260
; X86-NEXT: xorl %eax, %eax
206261
; X86-NEXT: testb %dl, %dl
207-
; X86-NEXT: jne .LBB12_2
262+
; X86-NEXT: jne .LBB14_2
208263
; X86-NEXT: # %bb.1: # %if
209264
; X86-NEXT: xorl %eax, %eax
210265
; X86-NEXT: xorl %edx, %edx
211-
; X86-NEXT: .LBB12_2: # %exit
266+
; X86-NEXT: .LBB14_2: # %exit
212267
; X86-NEXT: retl
213268
;
214269
; X64-LABEL: PR55050:

0 commit comments

Comments
 (0)