@@ -259,6 +259,61 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
259
259
ret void
260
260
}
261
261
262
+ ; TODO: Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
263
+ define <4 x i64 > @vselect_concat_split_v16i8 (<4 x i64 > %a , <4 x i64 > %b , <4 x i64 > %c , <4 x i64 > %d ) {
264
+ ; AVX1-LABEL: vselect_concat_split_v16i8:
265
+ ; AVX1: ## %bb.0:
266
+ ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
267
+ ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
268
+ ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
269
+ ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
270
+ ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm2
271
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
272
+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
273
+ ; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
274
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
275
+ ; AVX1-NEXT: retq
276
+ ;
277
+ ; AVX2-LABEL: vselect_concat_split_v16i8:
278
+ ; AVX2: ## %bb.0:
279
+ ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
280
+ ; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm3
281
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
282
+ ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
283
+ ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
284
+ ; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
285
+ ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
286
+ ; AVX2-NEXT: retq
287
+ ;
288
+ ; AVX512-LABEL: vselect_concat_split_v16i8:
289
+ ; AVX512: ## %bb.0:
290
+ ; AVX512-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
291
+ ; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
292
+ ; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm4
293
+ ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 killed $ymm1 def $ymm1
294
+ ; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm2, %xmm1
295
+ ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
296
+ ; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm3, %xmm4
297
+ ; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0
298
+ ; AVX512-NEXT: retq
299
+ %a.bc = bitcast <4 x i64 > %a to <32 x i8 >
300
+ %b.bc = bitcast <4 x i64 > %b to <32 x i8 >
301
+ %c.bc = bitcast <4 x i64 > %c to <32 x i8 >
302
+ %d.bc = bitcast <4 x i64 > %d to <32 x i8 >
303
+ %cmp = icmp slt <32 x i8 > %c.bc , %d.bc
304
+ %a.lo = shufflevector <32 x i8 > %a.bc , <32 x i8 > poison, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
305
+ %b.lo = shufflevector <32 x i8 > %b.bc , <32 x i8 > poison, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
306
+ %cmp.lo = shufflevector <32 x i1 > %cmp , <32 x i1 > poison, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
307
+ %lo = select <16 x i1 > %cmp.lo , <16 x i8 > %b.lo , <16 x i8 > %a.lo
308
+ %a.hi = shufflevector <32 x i8 > %a.bc , <32 x i8 > poison, <16 x i32 > <i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
309
+ %b.hi = shufflevector <32 x i8 > %b.bc , <32 x i8 > poison, <16 x i32 > <i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
310
+ %cmp.hi = shufflevector <32 x i1 > %cmp , <32 x i1 > poison, <16 x i32 > <i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
311
+ %hi = select <16 x i1 > %cmp.hi , <16 x i8 > %b.hi , <16 x i8 > %a.hi
312
+ %concat = shufflevector <16 x i8 > %lo , <16 x i8 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
313
+ %result = bitcast <32 x i8 > %concat to <4 x i64 >
314
+ ret <4 x i64 > %result
315
+ }
316
+
262
317
; Regression test for rGea8fb3b60196
263
318
define void @vselect_concat () {
264
319
; AVX-LABEL: vselect_concat:
0 commit comments