@@ -582,6 +582,122 @@ define <8 x float> @fdiv_const_op1_splat_v8f32(<8 x float> %vx) {
582
582
ret <8 x float > %r
583
583
}
584
584
585
+ define <2 x double > @fadd_splat_const_op1_v2f64 (<2 x double > %vx ) {
586
+ ; SSE-LABEL: fadd_splat_const_op1_v2f64:
587
+ ; SSE: # %bb.0:
588
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
589
+ ; SSE-NEXT: addpd {{.*}}(%rip), %xmm0
590
+ ; SSE-NEXT: retq
591
+ ;
592
+ ; AVX-LABEL: fadd_splat_const_op1_v2f64:
593
+ ; AVX: # %bb.0:
594
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
595
+ ; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
596
+ ; AVX-NEXT: retq
597
+ %splatx = shufflevector <2 x double > %vx , <2 x double > undef , <2 x i32 > zeroinitializer
598
+ %r = fadd <2 x double > %splatx , <double 42 .0 , double 42 .0 >
599
+ ret <2 x double > %r
600
+ }
601
+
602
+ define <4 x double > @fsub_const_op0_splat_v4f64 (double %x ) {
603
+ ; SSE-LABEL: fsub_const_op0_splat_v4f64:
604
+ ; SSE: # %bb.0:
605
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
606
+ ; SSE-NEXT: subpd %xmm0, %xmm1
607
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
608
+ ; SSE-NEXT: movapd %xmm1, %xmm0
609
+ ; SSE-NEXT: retq
610
+ ;
611
+ ; AVX-LABEL: fsub_const_op0_splat_v4f64:
612
+ ; AVX: # %bb.0:
613
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
614
+ ; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
615
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
616
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
617
+ ; AVX-NEXT: retq
618
+ %vx = insertelement <4 x double > undef , double 8 .0 , i32 0
619
+ %vy = insertelement <4 x double > undef , double %x , i32 0
620
+ %splatx = shufflevector <4 x double > %vx , <4 x double > undef , <4 x i32 > zeroinitializer
621
+ %splaty = shufflevector <4 x double > %vy , <4 x double > undef , <4 x i32 > zeroinitializer
622
+ %r = fsub <4 x double > %splatx , %splaty
623
+ ret <4 x double > %r
624
+ }
625
+
626
+ define <4 x float > @fmul_splat_const_op1_v4f32 (<4 x float > %vx , <4 x float > %vy ) {
627
+ ; SSE-LABEL: fmul_splat_const_op1_v4f32:
628
+ ; SSE: # %bb.0:
629
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
630
+ ; SSE-NEXT: mulps {{.*}}(%rip), %xmm0
631
+ ; SSE-NEXT: retq
632
+ ;
633
+ ; AVX-LABEL: fmul_splat_const_op1_v4f32:
634
+ ; AVX: # %bb.0:
635
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
636
+ ; AVX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
637
+ ; AVX-NEXT: retq
638
+ %splatx = shufflevector <4 x float > %vx , <4 x float > undef , <4 x i32 > zeroinitializer
639
+ %r = fmul fast <4 x float > %splatx , <float 17 .0 , float 17 .0 , float 17 .0 , float 17 .0 >
640
+ ret <4 x float > %r
641
+ }
642
+
643
+ define <8 x float > @fdiv_splat_const_op0_v8f32 (<8 x float > %vy ) {
644
+ ; SSE-LABEL: fdiv_splat_const_op0_v8f32:
645
+ ; SSE: # %bb.0:
646
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
647
+ ; SSE-NEXT: rcpps %xmm0, %xmm2
648
+ ; SSE-NEXT: mulps %xmm2, %xmm0
649
+ ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
650
+ ; SSE-NEXT: subps %xmm0, %xmm1
651
+ ; SSE-NEXT: mulps %xmm2, %xmm1
652
+ ; SSE-NEXT: addps %xmm2, %xmm1
653
+ ; SSE-NEXT: mulps {{.*}}(%rip), %xmm1
654
+ ; SSE-NEXT: movaps %xmm1, %xmm0
655
+ ; SSE-NEXT: retq
656
+ ;
657
+ ; AVX-LABEL: fdiv_splat_const_op0_v8f32:
658
+ ; AVX: # %bb.0:
659
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
660
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
661
+ ; AVX-NEXT: vrcpps %ymm0, %ymm1
662
+ ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
663
+ ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
664
+ ; AVX-NEXT: vsubps %ymm0, %ymm2, %ymm0
665
+ ; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0
666
+ ; AVX-NEXT: vaddps %ymm0, %ymm1, %ymm0
667
+ ; AVX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
668
+ ; AVX-NEXT: retq
669
+ %splatx = shufflevector <8 x float > <float 4 .5 , float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 >, <8 x float > undef , <8 x i32 > zeroinitializer
670
+ %splaty = shufflevector <8 x float > %vy , <8 x float > undef , <8 x i32 > zeroinitializer
671
+ %r = fdiv fast <8 x float > %splatx , %splaty
672
+ ret <8 x float > %r
673
+ }
674
+
675
+ define <8 x float > @fdiv_const_op1_splat_v8f32 (<8 x float > %vx ) {
676
+ ; SSE-LABEL: fdiv_const_op1_splat_v8f32:
677
+ ; SSE: # %bb.0:
678
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
679
+ ; SSE-NEXT: xorps %xmm1, %xmm1
680
+ ; SSE-NEXT: rcpps %xmm1, %xmm1
681
+ ; SSE-NEXT: addps %xmm1, %xmm1
682
+ ; SSE-NEXT: mulps %xmm0, %xmm1
683
+ ; SSE-NEXT: movaps %xmm1, %xmm0
684
+ ; SSE-NEXT: retq
685
+ ;
686
+ ; AVX-LABEL: fdiv_const_op1_splat_v8f32:
687
+ ; AVX: # %bb.0:
688
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
689
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
690
+ ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
691
+ ; AVX-NEXT: vrcpps %ymm1, %ymm1
692
+ ; AVX-NEXT: vaddps %ymm1, %ymm1, %ymm1
693
+ ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
694
+ ; AVX-NEXT: retq
695
+ %splatx = shufflevector <8 x float > %vx , <8 x float > undef , <8 x i32 > zeroinitializer
696
+ %splaty = shufflevector <8 x float > <float 0 .0 , float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 >, <8 x float > undef , <8 x i32 > zeroinitializer
697
+ %r = fdiv fast <8 x float > %splatx , %splaty
698
+ ret <8 x float > %r
699
+ }
700
+
585
701
define <2 x double > @splat0_fadd_v2f64 (<2 x double > %vx , <2 x double > %vy ) {
586
702
; SSE-LABEL: splat0_fadd_v2f64:
587
703
; SSE: # %bb.0:
@@ -782,3 +898,118 @@ define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) {
782
898
%r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
783
899
ret <8 x float > %r
784
900
}
901
+
902
+ define <2 x double > @splat0_fadd_const_op1_v2f64 (<2 x double > %vx ) {
903
+ ; SSE-LABEL: splat0_fadd_const_op1_v2f64:
904
+ ; SSE: # %bb.0:
905
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
906
+ ; SSE-NEXT: addpd %xmm0, %xmm1
907
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
908
+ ; SSE-NEXT: movapd %xmm1, %xmm0
909
+ ; SSE-NEXT: retq
910
+ ;
911
+ ; AVX-LABEL: splat0_fadd_const_op1_v2f64:
912
+ ; AVX: # %bb.0:
913
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
914
+ ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
915
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
916
+ ; AVX-NEXT: retq
917
+ %b = fadd <2 x double > %vx , <double 42 .0 , double 12 .0 >
918
+ %r = shufflevector <2 x double > %b , <2 x double > undef , <2 x i32 > zeroinitializer
919
+ ret <2 x double > %r
920
+ }
921
+
922
+ define <4 x double > @splat0_fsub_const_op0_v4f64 (double %x ) {
923
+ ; SSE-LABEL: splat0_fsub_const_op0_v4f64:
924
+ ; SSE: # %bb.0:
925
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
926
+ ; SSE-NEXT: subpd %xmm0, %xmm1
927
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
928
+ ; SSE-NEXT: movapd %xmm1, %xmm0
929
+ ; SSE-NEXT: retq
930
+ ;
931
+ ; AVX-LABEL: splat0_fsub_const_op0_v4f64:
932
+ ; AVX: # %bb.0:
933
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
934
+ ; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
935
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
936
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
937
+ ; AVX-NEXT: retq
938
+ %vx = insertelement <4 x double > undef , double %x , i32 0
939
+ %b = fsub <4 x double > <double -42 .0 , double 42 .0 , double 0 .0 , double 1 .0 >, %vx
940
+ %r = shufflevector <4 x double > %b , <4 x double > undef , <4 x i32 > zeroinitializer
941
+ ret <4 x double > %r
942
+ }
943
+
944
+ define <4 x float > @splat0_fmul_const_op1_v4f32 (<4 x float > %vx ) {
945
+ ; SSE-LABEL: splat0_fmul_const_op1_v4f32:
946
+ ; SSE: # %bb.0:
947
+ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
948
+ ; SSE-NEXT: mulps %xmm0, %xmm1
949
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
950
+ ; SSE-NEXT: movaps %xmm1, %xmm0
951
+ ; SSE-NEXT: retq
952
+ ;
953
+ ; AVX-LABEL: splat0_fmul_const_op1_v4f32:
954
+ ; AVX: # %bb.0:
955
+ ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
956
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
957
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
958
+ ; AVX-NEXT: retq
959
+ %b = fmul fast <4 x float > %vx , <float 6 .0 , float -1 .0 , float 1 .0 , float 7 .0 >
960
+ %r = shufflevector <4 x float > %b , <4 x float > undef , <4 x i32 > zeroinitializer
961
+ ret <4 x float > %r
962
+ }
963
+
964
+ define <8 x float > @splat0_fdiv_const_op1_v8f32 (<8 x float > %vx ) {
965
+ ; SSE-LABEL: splat0_fdiv_const_op1_v8f32:
966
+ ; SSE: # %bb.0:
967
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
968
+ ; SSE-NEXT: movaps %xmm0, %xmm1
969
+ ; SSE-NEXT: retq
970
+ ;
971
+ ; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
972
+ ; AVX: # %bb.0:
973
+ ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
974
+ ; AVX-NEXT: vrcpps %ymm1, %ymm1
975
+ ; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
976
+ ; AVX-NEXT: vsubps %xmm1, %xmm2, %xmm2
977
+ ; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm2
978
+ ; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
979
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
980
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
981
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
982
+ ; AVX-NEXT: retq
983
+ %b = fdiv fast <8 x float > %vx , <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 >
984
+ %r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
985
+ ret <8 x float > %r
986
+ }
987
+
988
+ define <8 x float > @splat0_fdiv_const_op0_v8f32 (<8 x float > %vx ) {
989
+ ; SSE-LABEL: splat0_fdiv_const_op0_v8f32:
990
+ ; SSE: # %bb.0:
991
+ ; SSE-NEXT: rcpps %xmm0, %xmm2
992
+ ; SSE-NEXT: mulps %xmm2, %xmm0
993
+ ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
994
+ ; SSE-NEXT: subps %xmm0, %xmm1
995
+ ; SSE-NEXT: mulps %xmm2, %xmm1
996
+ ; SSE-NEXT: addps %xmm2, %xmm1
997
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
998
+ ; SSE-NEXT: movaps %xmm1, %xmm0
999
+ ; SSE-NEXT: retq
1000
+ ;
1001
+ ; AVX-LABEL: splat0_fdiv_const_op0_v8f32:
1002
+ ; AVX: # %bb.0:
1003
+ ; AVX-NEXT: vrcpps %ymm0, %ymm1
1004
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
1005
+ ; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1006
+ ; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0
1007
+ ; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
1008
+ ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
1009
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1010
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1011
+ ; AVX-NEXT: retq
1012
+ %b = fdiv fast <8 x float > <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 >, %vx
1013
+ %r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
1014
+ ret <8 x float > %r
1015
+ }
0 commit comments