@@ -840,44 +840,18 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
840
840
define <2 x float > @fmul_pow_shl_cnt_vec_fail_expensive_cast (<2 x i64 > %cnt ) nounwind {
841
841
; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
842
842
; CHECK-SSE: # %bb.0:
843
- ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
844
- ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2]
845
- ; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1
846
- ; CHECK-SSE-NEXT: psllq %xmm2, %xmm1
847
- ; CHECK-SSE-NEXT: psllq %xmm0, %xmm3
848
- ; CHECK-SSE-NEXT: movq %xmm3, %rax
849
- ; CHECK-SSE-NEXT: testq %rax, %rax
850
- ; CHECK-SSE-NEXT: js .LBB12_1
851
- ; CHECK-SSE-NEXT: # %bb.2:
852
- ; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
853
- ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
854
- ; CHECK-SSE-NEXT: jmp .LBB12_3
855
- ; CHECK-SSE-NEXT: .LBB12_1:
856
- ; CHECK-SSE-NEXT: movq %rax, %rcx
857
- ; CHECK-SSE-NEXT: shrq %rcx
858
- ; CHECK-SSE-NEXT: andl $1, %eax
859
- ; CHECK-SSE-NEXT: orq %rcx, %rax
843
+ ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
844
+ ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
845
+ ; CHECK-SSE-NEXT: movdqa %xmm2, %xmm3
846
+ ; CHECK-SSE-NEXT: psllq %xmm1, %xmm3
847
+ ; CHECK-SSE-NEXT: psllq %xmm0, %xmm2
848
+ ; CHECK-SSE-NEXT: movq %xmm2, %rax
860
849
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
861
850
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
862
- ; CHECK-SSE-NEXT: addss %xmm0, %xmm0
863
- ; CHECK-SSE-NEXT: .LBB12_3:
864
- ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
851
+ ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
865
852
; CHECK-SSE-NEXT: movq %xmm1, %rax
866
- ; CHECK-SSE-NEXT: testq %rax, %rax
867
- ; CHECK-SSE-NEXT: js .LBB12_4
868
- ; CHECK-SSE-NEXT: # %bb.5:
869
- ; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
870
- ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
871
- ; CHECK-SSE-NEXT: jmp .LBB12_6
872
- ; CHECK-SSE-NEXT: .LBB12_4:
873
- ; CHECK-SSE-NEXT: movq %rax, %rcx
874
- ; CHECK-SSE-NEXT: shrq %rcx
875
- ; CHECK-SSE-NEXT: andl $1, %eax
876
- ; CHECK-SSE-NEXT: orq %rcx, %rax
877
853
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
878
854
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
879
- ; CHECK-SSE-NEXT: addss %xmm1, %xmm1
880
- ; CHECK-SSE-NEXT: .LBB12_6:
881
855
; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
882
856
; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
883
857
; CHECK-SSE-NEXT: retq
@@ -886,18 +860,11 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
886
860
; CHECK-AVX2: # %bb.0:
887
861
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
888
862
; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
889
- ; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm1
890
- ; CHECK-AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
891
- ; CHECK-AVX2-NEXT: vpextrq $1, %xmm1, %rax
892
- ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
893
- ; CHECK-AVX2-NEXT: vmovq %xmm1, %rax
894
- ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
895
- ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
896
- ; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
897
- ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
898
- ; CHECK-AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
899
- ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
900
- ; CHECK-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
863
+ ; CHECK-AVX2-NEXT: vpextrq $1, %xmm0, %rax
864
+ ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
865
+ ; CHECK-AVX2-NEXT: vmovq %xmm0, %rax
866
+ ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
867
+ ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
901
868
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
902
869
; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
903
870
; CHECK-AVX2-NEXT: retq
@@ -907,9 +874,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
907
874
; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
908
875
; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
909
876
; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax
910
- ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
877
+ ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
911
878
; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax
912
- ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
879
+ ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
913
880
; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
914
881
; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
915
882
; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -919,7 +886,7 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
919
886
; CHECK-FMA: # %bb.0:
920
887
; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
921
888
; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
922
- ; CHECK-FMA-NEXT: vcvtuqq2ps %xmm0, %xmm0
889
+ ; CHECK-FMA-NEXT: vcvtqq2ps %xmm0, %xmm0
923
890
; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
924
891
; CHECK-FMA-NEXT: retq
925
892
%shl = shl nsw nuw <2 x i64 > <i64 2 , i64 2 >, %cnt
@@ -986,7 +953,7 @@ define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float
986
953
; CHECK-FMA: # %bb.0:
987
954
; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2]
988
955
; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0
989
- ; CHECK-FMA-NEXT: vcvtudq2ps %xmm0, %xmm0
956
+ ; CHECK-FMA-NEXT: vcvtdq2ps %xmm0, %xmm0
990
957
; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
991
958
; CHECK-FMA-NEXT: retq
992
959
%shl = shl nsw nuw <4 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 >, %cnt
0 commit comments