@@ -471,7 +471,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
471
471
;
472
472
; AVX-LABEL: combine_vec_udiv_uniform:
473
473
; AVX: # %bb.0:
474
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
474
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645]
475
475
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
476
476
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
477
477
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
@@ -480,7 +480,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
480
480
;
481
481
; XOP-LABEL: combine_vec_udiv_uniform:
482
482
; XOP: # %bb.0:
483
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
483
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645]
484
484
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
485
485
; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0
486
486
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
@@ -500,7 +500,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
500
500
; SSE2-NEXT: psrlw $3, %xmm3
501
501
; SSE2-NEXT: pandn %xmm3, %xmm1
502
502
; SSE2-NEXT: por %xmm2, %xmm1
503
- ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
503
+ ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
504
504
; SSE2-NEXT: psubw %xmm1, %xmm0
505
505
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
506
506
; SSE2-NEXT: paddw %xmm1, %xmm0
@@ -515,7 +515,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
515
515
; SSE41-NEXT: movdqa %xmm0, %xmm1
516
516
; SSE41-NEXT: psrlw $3, %xmm1
517
517
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
518
- ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
518
+ ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
519
519
; SSE41-NEXT: psubw %xmm1, %xmm0
520
520
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
521
521
; SSE41-NEXT: paddw %xmm1, %xmm0
@@ -528,18 +528,18 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
528
528
; AVX: # %bb.0:
529
529
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm1
530
530
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
531
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
531
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
532
532
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
533
533
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
534
534
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
535
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
535
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u]
536
536
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7]
537
537
; AVX-NEXT: retq
538
538
;
539
539
; XOP-LABEL: combine_vec_udiv_nonuniform:
540
540
; XOP: # %bb.0:
541
541
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
542
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
542
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
543
543
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
544
544
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
545
545
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
@@ -558,8 +558,8 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
558
558
; SSE2-NEXT: psrlw $1, %xmm0
559
559
; SSE2-NEXT: pandn %xmm0, %xmm1
560
560
; SSE2-NEXT: por %xmm2, %xmm1
561
- ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
562
- ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
561
+ ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [16393,59919,58255,32787,55189,8197,52429,32789]
562
+ ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,2048,2048,2,2048,8,2048,2]
563
563
; SSE2-NEXT: movdqa %xmm1, %xmm0
564
564
; SSE2-NEXT: retq
565
565
;
@@ -568,22 +568,22 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
568
568
; SSE41-NEXT: movdqa %xmm0, %xmm1
569
569
; SSE41-NEXT: psrlw $1, %xmm1
570
570
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
571
- ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
572
- ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
571
+ ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
572
+ ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,2048,2048,2,2048,8,2048,2]
573
573
; SSE41-NEXT: retq
574
574
;
575
575
; AVX-LABEL: combine_vec_udiv_nonuniform2:
576
576
; AVX: # %bb.0:
577
577
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1
578
578
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
579
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
580
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
579
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
580
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [8,2048,2048,2,2048,8,2048,2]
581
581
; AVX-NEXT: retq
582
582
;
583
583
; XOP-LABEL: combine_vec_udiv_nonuniform2:
584
584
; XOP: # %bb.0:
585
585
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
586
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
586
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789]
587
587
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
588
588
; XOP-NEXT: retq
589
589
%1 = udiv <8 x i16 > %x , <i16 -34 , i16 35 , i16 36 , i16 -37 , i16 38 , i16 -39 , i16 40 , i16 -41 >
@@ -598,21 +598,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
598
598
; SSE-NEXT: psubw %xmm1, %xmm0
599
599
; SSE-NEXT: psrlw $1, %xmm0
600
600
; SSE-NEXT: paddw %xmm1, %xmm0
601
- ; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
601
+ ; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024]
602
602
; SSE-NEXT: retq
603
603
;
604
604
; AVX-LABEL: combine_vec_udiv_nonuniform3:
605
605
; AVX: # %bb.0:
606
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
606
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517]
607
607
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
608
608
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
609
609
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
610
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
610
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024]
611
611
; AVX-NEXT: retq
612
612
;
613
613
; XOP-LABEL: combine_vec_udiv_nonuniform3:
614
614
; XOP: # %bb.0:
615
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
615
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517]
616
616
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
617
617
; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0
618
618
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
@@ -687,7 +687,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
687
687
; SSE2-NEXT: pmulhuw %xmm0, %xmm1
688
688
; SSE2-NEXT: movdqa %xmm0, %xmm2
689
689
; SSE2-NEXT: psubw %xmm1, %xmm2
690
- ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
690
+ ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768]
691
691
; SSE2-NEXT: paddw %xmm1, %xmm2
692
692
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535]
693
693
; SSE2-NEXT: pandn %xmm2, %xmm1
@@ -706,7 +706,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
706
706
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
707
707
; SSE41-NEXT: movdqa %xmm0, %xmm2
708
708
; SSE41-NEXT: psubw %xmm1, %xmm2
709
- ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
709
+ ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768]
710
710
; SSE41-NEXT: paddw %xmm1, %xmm2
711
711
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,1024,1024,16,4,1024,u,4096]
712
712
; SSE41-NEXT: pmulhuw %xmm2, %xmm1
@@ -716,20 +716,20 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
716
716
;
717
717
; AVX-LABEL: pr38477:
718
718
; AVX: # %bb.0:
719
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
719
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115]
720
720
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2
721
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
721
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768]
722
722
; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1
723
- ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
723
+ ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,1024,1024,16,4,1024,u,4096]
724
724
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
725
725
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
726
726
; AVX-NEXT: retq
727
727
;
728
728
; XOP-LABEL: pr38477:
729
729
; XOP: # %bb.0:
730
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
730
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115]
731
731
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm2
732
- ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
732
+ ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768]
733
733
; XOP-NEXT: vpaddw %xmm1, %xmm2, %xmm1
734
734
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
735
735
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
0 commit comments