@@ -98,6 +98,61 @@ define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
98
98
}
99
99
declare <4 x i32 > @llvm.fshl.v4i32 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
100
100
101
+ define <16 x i16 > @concat2_permw_v8i16 (<8 x i16 > %x , <8 x i16 > %y ) nounwind {
102
+ ; CHECK-LABEL: concat2_permw_v8i16:
103
+ ; CHECK: # %bb.0:
104
+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
105
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
106
+ ; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
107
+ ; CHECK-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
108
+ ; CHECK-NEXT: ret{{[l|q]}}
109
+ %lo = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %x , <8 x i16 > <i16 7 , i16 0 , i16 6 , i16 1 , i16 5 , i16 2 , i16 4 , i16 3 >)
110
+ %hi = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %y , <8 x i16 > <i16 5 , i16 2 , i16 4 , i16 3 , i16 7 , i16 0 , i16 6 , i16 1 >)
111
+ %res = shufflevector <8 x i16 > %lo , <8 x i16 > %hi , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
112
+ ret <16 x i16 > %res
113
+ }
114
+
115
+ define <32 x i16 > @concat4_permw_v8i16 (<8 x i16 > %x , <8 x i16 > %y , <8 x i16 > %z , <8 x i16 > %w ) nounwind {
116
+ ; X86-LABEL: concat4_permw_v8i16:
117
+ ; X86: # %bb.0:
118
+ ; X86-NEXT: pushl %ebp
119
+ ; X86-NEXT: movl %esp, %ebp
120
+ ; X86-NEXT: andl $-16, %esp
121
+ ; X86-NEXT: subl $16, %esp
122
+ ; X86-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
123
+ ; X86-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
124
+ ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
125
+ ; X86-NEXT: vmovdqa 8(%ebp), %xmm3
126
+ ; X86-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
127
+ ; X86-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
128
+ ; X86-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
129
+ ; X86-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
130
+ ; X86-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
131
+ ; X86-NEXT: movl %ebp, %esp
132
+ ; X86-NEXT: popl %ebp
133
+ ; X86-NEXT: retl
134
+ ;
135
+ ; X64-LABEL: concat4_permw_v8i16:
136
+ ; X64: # %bb.0:
137
+ ; X64-NEXT: # kill: def $xmm3 killed $xmm3 def $ymm3
138
+ ; X64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
139
+ ; X64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
140
+ ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
141
+ ; X64-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
142
+ ; X64-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
143
+ ; X64-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
144
+ ; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
145
+ ; X64-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
146
+ ; X64-NEXT: retq
147
+ %px = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %x , <8 x i16 > <i16 7 , i16 0 , i16 6 , i16 1 , i16 5 , i16 2 , i16 4 , i16 3 >)
148
+ %py = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %y , <8 x i16 > <i16 5 , i16 2 , i16 4 , i16 3 , i16 7 , i16 0 , i16 6 , i16 1 >)
149
+ %pz = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %z , <8 x i16 > <i16 6 , i16 1 , i16 7 , i16 0 , i16 4 , i16 3 , i16 5 , i16 2 >)
150
+ %pw = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %w , <8 x i16 > <i16 4 , i16 3 , i16 5 , i16 2 , i16 6 , i16 1 , i16 7 , i16 0 >)
151
+ %lo = shufflevector <8 x i16 > %px , <8 x i16 > %py , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
152
+ %hi = shufflevector <8 x i16 > %pz , <8 x i16 > %pw , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
153
+ %res = shufflevector <16 x i16 > %lo , <16 x i16 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
154
+ ret <32 x i16 > %res
155
+ }
101
156
102
157
define <8 x i32 > @concat_vrotli_v4i32 (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
103
158
; CHECK-LABEL: concat_vrotli_v4i32:
@@ -204,11 +259,11 @@ define i64 @PR55050() {
204
259
; X86-NEXT: xorl %edx, %edx
205
260
; X86-NEXT: xorl %eax, %eax
206
261
; X86-NEXT: testb %dl, %dl
207
- ; X86-NEXT: jne .LBB12_2
262
+ ; X86-NEXT: jne .LBB14_2
208
263
; X86-NEXT: # %bb.1: # %if
209
264
; X86-NEXT: xorl %eax, %eax
210
265
; X86-NEXT: xorl %edx, %edx
211
- ; X86-NEXT: .LBB12_2 : # %exit
266
+ ; X86-NEXT: .LBB14_2 : # %exit
212
267
; X86-NEXT: retl
213
268
;
214
269
; X64-LABEL: PR55050:
0 commit comments