@@ -466,6 +466,122 @@ define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) {
466
466
ret <8 x float > %r
467
467
}
468
468
469
+ define <2 x double > @fadd_splat_const_op1_v2f64 (<2 x double > %vx ) {
470
+ ; SSE-LABEL: fadd_splat_const_op1_v2f64:
471
+ ; SSE: # %bb.0:
472
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
473
+ ; SSE-NEXT: addpd {{.*}}(%rip), %xmm0
474
+ ; SSE-NEXT: retq
475
+ ;
476
+ ; AVX-LABEL: fadd_splat_const_op1_v2f64:
477
+ ; AVX: # %bb.0:
478
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
479
+ ; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
480
+ ; AVX-NEXT: retq
481
+ %splatx = shufflevector <2 x double > %vx , <2 x double > undef , <2 x i32 > zeroinitializer
482
+ %r = fadd <2 x double > %splatx , <double 42 .0 , double 42 .0 >
483
+ ret <2 x double > %r
484
+ }
485
+
486
+ define <4 x double > @fsub_const_op0_splat_v4f64 (double %x ) {
487
+ ; SSE-LABEL: fsub_const_op0_splat_v4f64:
488
+ ; SSE: # %bb.0:
489
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
490
+ ; SSE-NEXT: subpd %xmm0, %xmm1
491
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
492
+ ; SSE-NEXT: movapd %xmm1, %xmm0
493
+ ; SSE-NEXT: retq
494
+ ;
495
+ ; AVX-LABEL: fsub_const_op0_splat_v4f64:
496
+ ; AVX: # %bb.0:
497
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
498
+ ; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
499
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
500
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
501
+ ; AVX-NEXT: retq
502
+ %vx = insertelement <4 x double > undef , double 8 .0 , i32 0
503
+ %vy = insertelement <4 x double > undef , double %x , i32 0
504
+ %splatx = shufflevector <4 x double > %vx , <4 x double > undef , <4 x i32 > zeroinitializer
505
+ %splaty = shufflevector <4 x double > %vy , <4 x double > undef , <4 x i32 > zeroinitializer
506
+ %r = fsub <4 x double > %splatx , %splaty
507
+ ret <4 x double > %r
508
+ }
509
+
510
+ define <4 x float > @fmul_splat_const_op1_v4f32 (<4 x float > %vx , <4 x float > %vy ) {
511
+ ; SSE-LABEL: fmul_splat_const_op1_v4f32:
512
+ ; SSE: # %bb.0:
513
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
514
+ ; SSE-NEXT: mulps {{.*}}(%rip), %xmm0
515
+ ; SSE-NEXT: retq
516
+ ;
517
+ ; AVX-LABEL: fmul_splat_const_op1_v4f32:
518
+ ; AVX: # %bb.0:
519
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
520
+ ; AVX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
521
+ ; AVX-NEXT: retq
522
+ %splatx = shufflevector <4 x float > %vx , <4 x float > undef , <4 x i32 > zeroinitializer
523
+ %r = fmul fast <4 x float > %splatx , <float 17 .0 , float 17 .0 , float 17 .0 , float 17 .0 >
524
+ ret <4 x float > %r
525
+ }
526
+
527
+ define <8 x float > @fdiv_splat_const_op0_v8f32 (<8 x float > %vy ) {
528
+ ; SSE-LABEL: fdiv_splat_const_op0_v8f32:
529
+ ; SSE: # %bb.0:
530
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
531
+ ; SSE-NEXT: rcpps %xmm0, %xmm2
532
+ ; SSE-NEXT: mulps %xmm2, %xmm0
533
+ ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
534
+ ; SSE-NEXT: subps %xmm0, %xmm1
535
+ ; SSE-NEXT: mulps %xmm2, %xmm1
536
+ ; SSE-NEXT: addps %xmm2, %xmm1
537
+ ; SSE-NEXT: mulps {{.*}}(%rip), %xmm1
538
+ ; SSE-NEXT: movaps %xmm1, %xmm0
539
+ ; SSE-NEXT: retq
540
+ ;
541
+ ; AVX-LABEL: fdiv_splat_const_op0_v8f32:
542
+ ; AVX: # %bb.0:
543
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
544
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
545
+ ; AVX-NEXT: vrcpps %ymm0, %ymm1
546
+ ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
547
+ ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
548
+ ; AVX-NEXT: vsubps %ymm0, %ymm2, %ymm0
549
+ ; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0
550
+ ; AVX-NEXT: vaddps %ymm0, %ymm1, %ymm0
551
+ ; AVX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
552
+ ; AVX-NEXT: retq
553
+ %splatx = shufflevector <8 x float > <float 4 .5 , float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 >, <8 x float > undef , <8 x i32 > zeroinitializer
554
+ %splaty = shufflevector <8 x float > %vy , <8 x float > undef , <8 x i32 > zeroinitializer
555
+ %r = fdiv fast <8 x float > %splatx , %splaty
556
+ ret <8 x float > %r
557
+ }
558
+
559
+ define <8 x float > @fdiv_const_op1_splat_v8f32 (<8 x float > %vx ) {
560
+ ; SSE-LABEL: fdiv_const_op1_splat_v8f32:
561
+ ; SSE: # %bb.0:
562
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
563
+ ; SSE-NEXT: xorps %xmm1, %xmm1
564
+ ; SSE-NEXT: rcpps %xmm1, %xmm1
565
+ ; SSE-NEXT: addps %xmm1, %xmm1
566
+ ; SSE-NEXT: mulps %xmm0, %xmm1
567
+ ; SSE-NEXT: movaps %xmm1, %xmm0
568
+ ; SSE-NEXT: retq
569
+ ;
570
+ ; AVX-LABEL: fdiv_const_op1_splat_v8f32:
571
+ ; AVX: # %bb.0:
572
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
573
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
574
+ ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
575
+ ; AVX-NEXT: vrcpps %ymm1, %ymm1
576
+ ; AVX-NEXT: vaddps %ymm1, %ymm1, %ymm1
577
+ ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
578
+ ; AVX-NEXT: retq
579
+ %splatx = shufflevector <8 x float > %vx , <8 x float > undef , <8 x i32 > zeroinitializer
580
+ %splaty = shufflevector <8 x float > <float 0 .0 , float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 >, <8 x float > undef , <8 x i32 > zeroinitializer
581
+ %r = fdiv fast <8 x float > %splatx , %splaty
582
+ ret <8 x float > %r
583
+ }
584
+
469
585
define <2 x double > @splat0_fadd_v2f64 (<2 x double > %vx , <2 x double > %vy ) {
470
586
; SSE-LABEL: splat0_fadd_v2f64:
471
587
; SSE: # %bb.0:
@@ -551,3 +667,118 @@ define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) {
551
667
%r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
552
668
ret <8 x float > %r
553
669
}
670
+
671
+ define <2 x double > @splat0_fadd_const_op1_v2f64 (<2 x double > %vx ) {
672
+ ; SSE-LABEL: splat0_fadd_const_op1_v2f64:
673
+ ; SSE: # %bb.0:
674
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
675
+ ; SSE-NEXT: addpd %xmm0, %xmm1
676
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
677
+ ; SSE-NEXT: movapd %xmm1, %xmm0
678
+ ; SSE-NEXT: retq
679
+ ;
680
+ ; AVX-LABEL: splat0_fadd_const_op1_v2f64:
681
+ ; AVX: # %bb.0:
682
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
683
+ ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
684
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
685
+ ; AVX-NEXT: retq
686
+ %b = fadd <2 x double > %vx , <double 42 .0 , double 12 .0 >
687
+ %r = shufflevector <2 x double > %b , <2 x double > undef , <2 x i32 > zeroinitializer
688
+ ret <2 x double > %r
689
+ }
690
+
691
+ define <4 x double > @splat0_fsub_const_op0_v4f64 (double %x ) {
692
+ ; SSE-LABEL: splat0_fsub_const_op0_v4f64:
693
+ ; SSE: # %bb.0:
694
+ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
695
+ ; SSE-NEXT: subpd %xmm0, %xmm1
696
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
697
+ ; SSE-NEXT: movapd %xmm1, %xmm0
698
+ ; SSE-NEXT: retq
699
+ ;
700
+ ; AVX-LABEL: splat0_fsub_const_op0_v4f64:
701
+ ; AVX: # %bb.0:
702
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
703
+ ; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
704
+ ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
705
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
706
+ ; AVX-NEXT: retq
707
+ %vx = insertelement <4 x double > undef , double %x , i32 0
708
+ %b = fsub <4 x double > <double -42 .0 , double 42 .0 , double 0 .0 , double 1 .0 >, %vx
709
+ %r = shufflevector <4 x double > %b , <4 x double > undef , <4 x i32 > zeroinitializer
710
+ ret <4 x double > %r
711
+ }
712
+
713
+ define <4 x float > @splat0_fmul_const_op1_v4f32 (<4 x float > %vx ) {
714
+ ; SSE-LABEL: splat0_fmul_const_op1_v4f32:
715
+ ; SSE: # %bb.0:
716
+ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
717
+ ; SSE-NEXT: mulps %xmm0, %xmm1
718
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
719
+ ; SSE-NEXT: movaps %xmm1, %xmm0
720
+ ; SSE-NEXT: retq
721
+ ;
722
+ ; AVX-LABEL: splat0_fmul_const_op1_v4f32:
723
+ ; AVX: # %bb.0:
724
+ ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
725
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
726
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
727
+ ; AVX-NEXT: retq
728
+ %b = fmul fast <4 x float > %vx , <float 6 .0 , float -1 .0 , float 1 .0 , float 7 .0 >
729
+ %r = shufflevector <4 x float > %b , <4 x float > undef , <4 x i32 > zeroinitializer
730
+ ret <4 x float > %r
731
+ }
732
+
733
+ define <8 x float > @splat0_fdiv_const_op1_v8f32 (<8 x float > %vx ) {
734
+ ; SSE-LABEL: splat0_fdiv_const_op1_v8f32:
735
+ ; SSE: # %bb.0:
736
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
737
+ ; SSE-NEXT: movaps %xmm0, %xmm1
738
+ ; SSE-NEXT: retq
739
+ ;
740
+ ; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
741
+ ; AVX: # %bb.0:
742
+ ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
743
+ ; AVX-NEXT: vrcpps %ymm1, %ymm1
744
+ ; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
745
+ ; AVX-NEXT: vsubps %xmm1, %xmm2, %xmm2
746
+ ; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm2
747
+ ; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
748
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
749
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
750
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
751
+ ; AVX-NEXT: retq
752
+ %b = fdiv fast <8 x float > %vx , <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 >
753
+ %r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
754
+ ret <8 x float > %r
755
+ }
756
+
757
+ define <8 x float > @splat0_fdiv_const_op0_v8f32 (<8 x float > %vx ) {
758
+ ; SSE-LABEL: splat0_fdiv_const_op0_v8f32:
759
+ ; SSE: # %bb.0:
760
+ ; SSE-NEXT: rcpps %xmm0, %xmm2
761
+ ; SSE-NEXT: mulps %xmm2, %xmm0
762
+ ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
763
+ ; SSE-NEXT: subps %xmm0, %xmm1
764
+ ; SSE-NEXT: mulps %xmm2, %xmm1
765
+ ; SSE-NEXT: addps %xmm2, %xmm1
766
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
767
+ ; SSE-NEXT: movaps %xmm1, %xmm0
768
+ ; SSE-NEXT: retq
769
+ ;
770
+ ; AVX-LABEL: splat0_fdiv_const_op0_v8f32:
771
+ ; AVX: # %bb.0:
772
+ ; AVX-NEXT: vrcpps %ymm0, %ymm1
773
+ ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
774
+ ; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
775
+ ; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0
776
+ ; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
777
+ ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
778
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
779
+ ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
780
+ ; AVX-NEXT: retq
781
+ %b = fdiv fast <8 x float > <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 >, %vx
782
+ %r = shufflevector <8 x float > %b , <8 x float > undef , <8 x i32 > zeroinitializer
783
+ ret <8 x float > %r
784
+ }
0 commit comments