@@ -385,6 +385,98 @@ define <8 x i16> @and_or_v8i16(<8 x i16> %a0) {
385
385
ret <8 x i16 > %2
386
386
}
387
387
388
+ ;
389
+ ; TODO: Failure to merge and(ext(and(x,c1)),c2) if one gets folded to a shuffle clear mask
390
+ ;
391
+
392
+ define <8 x i32 > @clear_sext_and (<8 x i16 > %x ) {
393
+ ; SSE-LABEL: clear_sext_and:
394
+ ; SSE: # %bb.0:
395
+ ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
396
+ ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
397
+ ; SSE-NEXT: pmovsxwd %xmm1, %xmm1
398
+ ; SSE-NEXT: pmovsxwd %xmm0, %xmm0
399
+ ; SSE-NEXT: pxor %xmm2, %xmm2
400
+ ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
401
+ ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3,4,5,6,7]
402
+ ; SSE-NEXT: retq
403
+ ;
404
+ ; AVX1-LABEL: clear_sext_and:
405
+ ; AVX1: # %bb.0:
406
+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
407
+ ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
408
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
409
+ ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
410
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
411
+ ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
412
+ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
413
+ ; AVX1-NEXT: retq
414
+ ;
415
+ ; AVX2-LABEL: clear_sext_and:
416
+ ; AVX2: # %bb.0:
417
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
418
+ ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
419
+ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
420
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
421
+ ; AVX2-NEXT: retq
422
+ ;
423
+ ; AVX512-LABEL: clear_sext_and:
424
+ ; AVX512: # %bb.0:
425
+ ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
426
+ ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
427
+ ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
428
+ ; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
429
+ ; AVX512-NEXT: retq
430
+ %1 = and <8 x i16 > %x , <i16 -1 , i16 3 , i16 7 , i16 15 , i16 31 , i16 63 , i16 127 , i16 -1 >
431
+ %2 = sext <8 x i16 > %1 to <8 x i32 >
432
+ %3 = and <8 x i32 > %2 , <i32 -1 , i32 0 , i32 -1 , i32 0 , i32 0 , i32 -1 , i32 -1 , i32 -1 >
433
+ ret <8 x i32 > %3
434
+ }
435
+
436
+ define <8 x i32 > @clear_zext_and (<8 x i16 > %x ) {
437
+ ; SSE-LABEL: clear_zext_and:
438
+ ; SSE: # %bb.0:
439
+ ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
440
+ ; SSE-NEXT: pxor %xmm1, %xmm1
441
+ ; SSE-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
442
+ ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
443
+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
444
+ ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7]
445
+ ; SSE-NEXT: movdqa %xmm2, %xmm0
446
+ ; SSE-NEXT: retq
447
+ ;
448
+ ; AVX1-LABEL: clear_zext_and:
449
+ ; AVX1: # %bb.0:
450
+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
451
+ ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
452
+ ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
453
+ ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
454
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
455
+ ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
456
+ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
457
+ ; AVX1-NEXT: retq
458
+ ;
459
+ ; AVX2-LABEL: clear_zext_and:
460
+ ; AVX2: # %bb.0:
461
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
462
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
463
+ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
464
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
465
+ ; AVX2-NEXT: retq
466
+ ;
467
+ ; AVX512-LABEL: clear_zext_and:
468
+ ; AVX512: # %bb.0:
469
+ ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
470
+ ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
471
+ ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
472
+ ; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6,7]
473
+ ; AVX512-NEXT: retq
474
+ %1 = and <8 x i16 > %x , <i16 -1 , i16 3 , i16 7 , i16 15 , i16 31 , i16 63 , i16 127 , i16 -1 >
475
+ %2 = zext <8 x i16 > %1 to <8 x i32 >
476
+ %3 = and <8 x i32 > %2 , <i32 -1 , i32 0 , i32 -1 , i32 0 , i32 0 , i32 -1 , i32 -1 , i32 -1 >
477
+ ret <8 x i32 > %3
478
+ }
479
+
388
480
;
389
481
; known bits folding
390
482
;
0 commit comments