@@ -5783,23 +5783,20 @@ define float @v_log_f32_0() {
5783
5783
; SI-GISEL-LABEL: v_log_f32_0:
5784
5784
; SI-GISEL: ; %bb.0:
5785
5785
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5786
- ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5787
- ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5788
- ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5789
- ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5790
- ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5791
- ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5786
+ ; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
5792
5787
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5793
- ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5794
- ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5795
- ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5796
- ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5797
- ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5798
- ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5799
- ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5800
- ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5801
- ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5802
- ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5788
+ ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5789
+ ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5790
+ ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5791
+ ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5792
+ ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5793
+ ; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5794
+ ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5795
+ ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5796
+ ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5797
+ ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5798
+ ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5799
+ ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5803
5800
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5804
5801
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5805
5802
;
@@ -5825,26 +5822,23 @@ define float @v_log_f32_0() {
5825
5822
; VI-GISEL-LABEL: v_log_f32_0:
5826
5823
; VI-GISEL: ; %bb.0:
5827
5824
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5828
- ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5829
- ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5830
- ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5831
- ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5832
- ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5833
- ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5834
- ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5835
- ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5836
- ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
5825
+ ; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
5826
+ ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5827
+ ; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
5828
+ ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
5837
5829
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
5830
+ ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
5831
+ ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5832
+ ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
5838
5833
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
5839
5834
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
5840
5835
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5841
- ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5842
- ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5843
- ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5844
- ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5845
- ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5846
- ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5847
- ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5836
+ ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5837
+ ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5838
+ ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5839
+ ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5840
+ ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5841
+ ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5848
5842
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5849
5843
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5850
5844
;
@@ -5867,23 +5861,20 @@ define float @v_log_f32_0() {
5867
5861
; GFX900-GISEL-LABEL: v_log_f32_0:
5868
5862
; GFX900-GISEL: ; %bb.0:
5869
5863
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5870
- ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5871
- ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5872
- ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5873
- ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
5874
- ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5875
- ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5864
+ ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
5876
5865
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5877
- ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5878
- ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5879
- ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5880
- ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5881
- ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5882
- ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5883
- ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5884
- ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5885
- ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5886
- ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5866
+ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5867
+ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5868
+ ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5869
+ ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5870
+ ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5871
+ ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5872
+ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5873
+ ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5874
+ ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5875
+ ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5876
+ ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5877
+ ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5887
5878
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5888
5879
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5889
5880
;
@@ -5907,21 +5898,18 @@ define float @v_log_f32_0() {
5907
5898
; GFX1100-GISEL-LABEL: v_log_f32_0:
5908
5899
; GFX1100-GISEL: ; %bb.0:
5909
5900
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5910
- ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
5911
- ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
5912
- ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5913
- ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
5914
- ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5901
+ ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
5902
+ ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
5915
5903
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5916
5904
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5917
- ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0 , 0x7f800000, |v0|
5905
+ ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo , 0x7f800000, |v0|
5918
5906
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5919
5907
; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5920
5908
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5921
5909
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5922
5910
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5923
- ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
5924
- ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5911
+ ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
5912
+ ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
5925
5913
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
5926
5914
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5927
5915
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
0 commit comments