@@ -2563,15 +2563,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2563
2563
; AVX512F-NEXT: vbroadcastss %xmm4, %ymm12
2564
2564
; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
2565
2565
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm13 = xmm4[1,1,3,3]
2566
- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2566
+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
2567
2567
; AVX512F-NEXT: vmulps %ymm13, %ymm11, %ymm13
2568
2568
; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
2569
- ; AVX512F-NEXT: vshufps {{.*#+}} xmm13 = xmm4[2,2,2,2 ]
2570
- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2569
+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm13 = xmm4[1,0 ]
2570
+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
2571
2571
; AVX512F-NEXT: vmulps %ymm1, %ymm13, %ymm13
2572
2572
; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
2573
2573
; AVX512F-NEXT: vshufps {{.*#+}} xmm13 = xmm4[3,3,3,3]
2574
- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2574
+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
2575
2575
; AVX512F-NEXT: vmulps %ymm13, %ymm10, %ymm13
2576
2576
; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
2577
2577
; AVX512F-NEXT: vextractf128 $1, %ymm4, %xmm13
@@ -2627,15 +2627,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2627
2627
; AVX512F-NEXT: vbroadcastss %xmm5, %ymm13
2628
2628
; AVX512F-NEXT: vmulps %ymm0, %ymm13, %ymm13
2629
2629
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm14 = xmm5[1,1,3,3]
2630
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2630
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2631
2631
; AVX512F-NEXT: vmulps %ymm14, %ymm11, %ymm14
2632
2632
; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
2633
- ; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm5[2,2,2,2 ]
2634
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2633
+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm14 = xmm5[1,0 ]
2634
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2635
2635
; AVX512F-NEXT: vmulps %ymm1, %ymm14, %ymm14
2636
2636
; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
2637
2637
; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm5[3,3,3,3]
2638
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2638
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2639
2639
; AVX512F-NEXT: vmulps %ymm14, %ymm10, %ymm14
2640
2640
; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
2641
2641
; AVX512F-NEXT: vextractf128 $1, %ymm5, %xmm14
@@ -2689,15 +2689,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2689
2689
; AVX512F-NEXT: vbroadcastss %xmm6, %ymm12
2690
2690
; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
2691
2691
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm14 = xmm6[1,1,3,3]
2692
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2692
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2693
2693
; AVX512F-NEXT: vmulps %ymm14, %ymm11, %ymm14
2694
2694
; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
2695
- ; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm6[2,2,2,2 ]
2696
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2695
+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm14 = xmm6[1,0 ]
2696
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2697
2697
; AVX512F-NEXT: vmulps %ymm1, %ymm14, %ymm14
2698
2698
; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
2699
2699
; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm6[3,3,3,3]
2700
- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2700
+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
2701
2701
; AVX512F-NEXT: vmulps %ymm14, %ymm10, %ymm14
2702
2702
; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
2703
2703
; AVX512F-NEXT: vextractf128 $1, %ymm6, %xmm14
@@ -2753,15 +2753,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2753
2753
; AVX512F-NEXT: vbroadcastss %xmm7, %ymm12
2754
2754
; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
2755
2755
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm15 = xmm7[1,1,3,3]
2756
- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2756
+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
2757
2757
; AVX512F-NEXT: vmulps %ymm15, %ymm11, %ymm15
2758
2758
; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
2759
- ; AVX512F-NEXT: vshufps {{.*#+}} xmm15 = xmm7[2,2,2,2 ]
2760
- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2759
+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm15 = xmm7[1,0 ]
2760
+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
2761
2761
; AVX512F-NEXT: vmulps %ymm1, %ymm15, %ymm15
2762
2762
; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
2763
2763
; AVX512F-NEXT: vshufps {{.*#+}} xmm15 = xmm7[3,3,3,3]
2764
- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2764
+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
2765
2765
; AVX512F-NEXT: vmulps %ymm15, %ymm10, %ymm15
2766
2766
; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
2767
2767
; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm15
@@ -2828,15 +2828,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2828
2828
; AVX512VL-NEXT: vbroadcastss %xmm4, %ymm12
2829
2829
; AVX512VL-NEXT: vmulps %ymm0, %ymm12, %ymm12
2830
2830
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm13 = xmm4[1,1,3,3]
2831
- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2831
+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
2832
2832
; AVX512VL-NEXT: vmulps %ymm13, %ymm11, %ymm13
2833
2833
; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
2834
- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm13 = xmm4[2,2,2,2 ]
2835
- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2834
+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm13 = xmm4[1,0 ]
2835
+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
2836
2836
; AVX512VL-NEXT: vmulps %ymm1, %ymm13, %ymm13
2837
2837
; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
2838
2838
; AVX512VL-NEXT: vshufps {{.*#+}} xmm13 = xmm4[3,3,3,3]
2839
- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2839
+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
2840
2840
; AVX512VL-NEXT: vmulps %ymm13, %ymm10, %ymm13
2841
2841
; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
2842
2842
; AVX512VL-NEXT: vextractf128 $1, %ymm4, %xmm13
@@ -2890,15 +2890,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2890
2890
; AVX512VL-NEXT: vbroadcastss %xmm5, %ymm13
2891
2891
; AVX512VL-NEXT: vmulps %ymm0, %ymm13, %ymm13
2892
2892
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm14 = xmm5[1,1,3,3]
2893
- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2893
+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
2894
2894
; AVX512VL-NEXT: vmulps %ymm14, %ymm11, %ymm14
2895
2895
; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
2896
- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm14 = xmm5[2,2,2,2 ]
2897
- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2896
+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm14 = xmm5[1,0 ]
2897
+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
2898
2898
; AVX512VL-NEXT: vmulps %ymm1, %ymm14, %ymm14
2899
2899
; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
2900
2900
; AVX512VL-NEXT: vshufps {{.*#+}} xmm14 = xmm5[3,3,3,3]
2901
- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2901
+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
2902
2902
; AVX512VL-NEXT: vmulps %ymm14, %ymm10, %ymm14
2903
2903
; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
2904
2904
; AVX512VL-NEXT: vextractf128 $1, %ymm5, %xmm14
@@ -2952,15 +2952,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
2952
2952
; AVX512VL-NEXT: vbroadcastss %xmm6, %ymm14
2953
2953
; AVX512VL-NEXT: vmulps %ymm0, %ymm14, %ymm14
2954
2954
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm15 = xmm6[1,1,3,3]
2955
- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2955
+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
2956
2956
; AVX512VL-NEXT: vmulps %ymm15, %ymm11, %ymm15
2957
2957
; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
2958
- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm15 = xmm6[2,2,2,2 ]
2959
- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2958
+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm15 = xmm6[1,0 ]
2959
+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
2960
2960
; AVX512VL-NEXT: vmulps %ymm1, %ymm15, %ymm15
2961
2961
; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
2962
2962
; AVX512VL-NEXT: vshufps {{.*#+}} xmm15 = xmm6[3,3,3,3]
2963
- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2963
+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
2964
2964
; AVX512VL-NEXT: vmulps %ymm15, %ymm10, %ymm15
2965
2965
; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
2966
2966
; AVX512VL-NEXT: vextractf128 $1, %ymm6, %xmm15
@@ -3014,15 +3014,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
3014
3014
; AVX512VL-NEXT: vbroadcastss %xmm7, %ymm15
3015
3015
; AVX512VL-NEXT: vmulps %ymm0, %ymm15, %ymm15
3016
3016
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm16 = xmm7[1,1,3,3]
3017
- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3017
+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
3018
3018
; AVX512VL-NEXT: vmulps %ymm16, %ymm11, %ymm16
3019
3019
; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
3020
- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm16 = xmm7[2,2,2,2 ]
3021
- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3020
+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm16 = xmm7[1,0 ]
3021
+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
3022
3022
; AVX512VL-NEXT: vmulps %ymm16, %ymm1, %ymm16
3023
3023
; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
3024
3024
; AVX512VL-NEXT: vshufps {{.*#+}} xmm16 = xmm7[3,3,3,3]
3025
- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3025
+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
3026
3026
; AVX512VL-NEXT: vmulps %ymm16, %ymm10, %ymm16
3027
3027
; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
3028
3028
; AVX512VL-NEXT: vextractf32x4 $1, %ymm7, %xmm16
0 commit comments