@@ -328,3 +328,161 @@ entry:
328
328
%Y = zext <4 x i32 > %X to <4 x i64 >
329
329
ret <4 x i64 >%Y
330
330
}
331
+
332
+ define <8 x i32 > @shuf_zext_8i16_to_8i32 (<8 x i16 > %A ) nounwind uwtable readnone ssp {
333
+ ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
334
+ ; SSE2: # BB#0: # %entry
335
+ ; SSE2-NEXT: movdqa %xmm0, %xmm1
336
+ ; SSE2-NEXT: pxor %xmm2, %xmm2
337
+ ; SSE2-NEXT: # kill
338
+ ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
339
+ ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
340
+ ; SSE2-NEXT: retq
341
+ ;
342
+ ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
343
+ ; SSSE3: # BB#0: # %entry
344
+ ; SSSE3-NEXT: movdqa %xmm0, %xmm1
345
+ ; SSSE3-NEXT: pxor %xmm2, %xmm2
346
+ ; SSSE3-NEXT: # kill
347
+ ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
348
+ ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
349
+ ; SSSE3-NEXT: retq
350
+ ;
351
+ ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
352
+ ; SSE41: # BB#0: # %entry
353
+ ; SSE41-NEXT: movdqa %xmm0, %xmm1
354
+ ; SSE41-NEXT: pxor %xmm2, %xmm2
355
+ ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
356
+ ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
357
+ ; SSE41-NEXT: retq
358
+ ;
359
+ ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
360
+ ; AVX1: # BB#0: # %entry
361
+ ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
362
+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
363
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
364
+ ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
365
+ ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
366
+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
367
+ ; AVX1-NEXT: retq
368
+ ;
369
+ ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
370
+ ; AVX2: # BB#0: # %entry
371
+ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
372
+ ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
373
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
374
+ ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
375
+ ; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
376
+ ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
377
+ ; AVX2-NEXT: retq
378
+ entry:
379
+ %B = shufflevector <8 x i16 > %A , <8 x i16 > zeroinitializer , <16 x i32 > <i32 0 , i32 8 , i32 1 , i32 8 , i32 2 , i32 8 , i32 3 , i32 8 , i32 4 , i32 8 , i32 5 , i32 8 , i32 6 , i32 8 , i32 7 , i32 8 >
380
+ %Z = bitcast <16 x i16 > %B to <8 x i32 >
381
+ ret <8 x i32 > %Z
382
+ }
383
+
384
+ define <4 x i64 > @shuf_zext_4i32_to_4i64 (<4 x i32 > %A ) nounwind uwtable readnone ssp {
385
+ ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
386
+ ; SSE2: # BB#0: # %entry
387
+ ; SSE2-NEXT: movdqa %xmm0, %xmm1
388
+ ; SSE2-NEXT: pxor %xmm2, %xmm2
389
+ ; SSE2-NEXT: # kill
390
+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
391
+ ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
392
+ ; SSE2-NEXT: retq
393
+ ;
394
+ ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
395
+ ; SSSE3: # BB#0: # %entry
396
+ ; SSSE3-NEXT: movdqa %xmm0, %xmm1
397
+ ; SSSE3-NEXT: pxor %xmm2, %xmm2
398
+ ; SSSE3-NEXT: # kill
399
+ ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
400
+ ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
401
+ ; SSSE3-NEXT: retq
402
+ ;
403
+ ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
404
+ ; SSE41: # BB#0: # %entry
405
+ ; SSE41-NEXT: movdqa %xmm0, %xmm1
406
+ ; SSE41-NEXT: pxor %xmm2, %xmm2
407
+ ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
408
+ ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
409
+ ; SSE41-NEXT: retq
410
+ ;
411
+ ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
412
+ ; AVX1: # BB#0: # %entry
413
+ ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
414
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
415
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
416
+ ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
417
+ ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
418
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
419
+ ; AVX1-NEXT: retq
420
+ ;
421
+ ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
422
+ ; AVX2: # BB#0: # %entry
423
+ ; AVX2-NEXT: # kill
424
+ ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
425
+ ; AVX2-NEXT: xorl %eax, %eax
426
+ ; AVX2-NEXT: vmovd %eax, %xmm1
427
+ ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
428
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
429
+ ; AVX2-NEXT: retq
430
+ entry:
431
+ %B = shufflevector <4 x i32 > %A , <4 x i32 > zeroinitializer , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 4 , i32 2 , i32 4 , i32 3 , i32 4 >
432
+ %Z = bitcast <8 x i32 > %B to <4 x i64 >
433
+ ret <4 x i64 > %Z
434
+ }
435
+
436
+ define <8 x i32 > @shuf_zext_8i8_to_8i32 (<8 x i8 > %A ) {
437
+ ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
438
+ ; SSE2: # BB#0: # %entry
439
+ ; SSE2-NEXT: pand .LCPI9_0(%rip), %xmm0
440
+ ; SSE2-NEXT: packuswb %xmm0, %xmm0
441
+ ; SSE2-NEXT: pxor %xmm1, %xmm1
442
+ ; SSE2-NEXT: movdqa %xmm0, %xmm2
443
+ ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
444
+ ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
445
+ ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
446
+ ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
447
+ ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
448
+ ; SSE2-NEXT: pandn %xmm0, %xmm1
449
+ ; SSE2-NEXT: movdqa %xmm2, %xmm0
450
+ ; SSE2-NEXT: retq
451
+ ;
452
+ ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
453
+ ; SSSE3: # BB#0: # %entry
454
+ ; SSSE3-NEXT: movdqa %xmm0, %xmm1
455
+ ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
456
+ ; SSSE3-NEXT: pxor %xmm2, %xmm2
457
+ ; SSSE3-NEXT: movdqa %xmm1, %xmm0
458
+ ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
459
+ ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
460
+ ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
461
+ ; SSSE3-NEXT: retq
462
+ ;
463
+ ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
464
+ ; SSE41: # BB#0: # %entry
465
+ ; SSE41-NEXT: movdqa %xmm0, %xmm1
466
+ ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
467
+ ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
468
+ ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
469
+ ; SSE41-NEXT: retq
470
+ ;
471
+ ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
472
+ ; AVX1: # BB#0: # %entry
473
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
474
+ ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
475
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
476
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
477
+ ; AVX1-NEXT: retq
478
+ ;
479
+ ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
480
+ ; AVX2: # BB#0: # %entry
481
+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
482
+ ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
483
+ ; AVX2-NEXT: retq
484
+ entry:
485
+ %B = shufflevector <8 x i8 > %A , <8 x i8 > zeroinitializer , <32 x i32 > <i32 0 , i32 8 , i32 8 , i32 8 , i32 1 , i32 8 , i32 8 , i32 8 , i32 2 , i32 8 , i32 8 , i32 8 , i32 3 , i32 8 , i32 8 , i32 8 , i32 4 , i32 8 , i32 8 , i32 8 , i32 5 , i32 8 , i32 8 , i32 8 , i32 6 , i32 8 , i32 8 , i32 8 , i32 7 , i32 8 , i32 8 , i32 8 >
486
+ %Z = bitcast <32 x i8 > %B to <8 x i32 >
487
+ ret <8 x i32 > %Z
488
+ }
0 commit comments