@@ -4013,96 +4013,13 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f16_sign_v2f64(<2 x half> %mag, <2
4013
4013
; SI-LABEL: v_copysign_out_v2f16_mag_v2f16_sign_v2f64:
4014
4014
; SI: ; %bb.0:
4015
4015
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4016
- ; SI-NEXT: v_and_b32_e32 v7, 0x1ff, v5
4017
- ; SI-NEXT: v_or_b32_e32 v4, v7, v4
4018
- ; SI-NEXT: v_lshrrev_b32_e32 v6, 8, v5
4019
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
4020
- ; SI-NEXT: v_and_b32_e32 v6, 0xffe, v6
4021
- ; SI-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
4022
- ; SI-NEXT: v_bfe_u32 v7, v5, 20, 11
4023
- ; SI-NEXT: s_movk_i32 s4, 0x3f1
4024
- ; SI-NEXT: v_or_b32_e32 v4, v6, v4
4025
- ; SI-NEXT: v_sub_i32_e32 v8, vcc, s4, v7
4026
- ; SI-NEXT: v_or_b32_e32 v6, 0x1000, v4
4027
- ; SI-NEXT: v_med3_i32 v8, v8, 0, 13
4028
- ; SI-NEXT: v_lshrrev_b32_e32 v9, v8, v6
4029
- ; SI-NEXT: v_lshlrev_b32_e32 v8, v8, v9
4030
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, v8, v6
4031
- ; SI-NEXT: s_movk_i32 s5, 0xfc10
4032
- ; SI-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
4033
- ; SI-NEXT: v_add_i32_e32 v7, vcc, s5, v7
4034
- ; SI-NEXT: v_lshlrev_b32_e32 v8, 12, v7
4035
- ; SI-NEXT: v_or_b32_e32 v6, v9, v6
4036
- ; SI-NEXT: v_or_b32_e32 v8, v4, v8
4037
- ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 1, v7
4038
- ; SI-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
4039
- ; SI-NEXT: v_and_b32_e32 v8, 7, v6
4040
- ; SI-NEXT: v_cmp_lt_i32_e32 vcc, 5, v8
4041
- ; SI-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
4042
- ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8
4043
- ; SI-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
4044
- ; SI-NEXT: v_or_b32_e32 v8, v8, v9
4045
- ; SI-NEXT: v_lshrrev_b32_e32 v6, 2, v6
4046
- ; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v8
4047
- ; SI-NEXT: v_mov_b32_e32 v8, 0x7c00
4048
- ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 31, v7
4049
- ; SI-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
4050
- ; SI-NEXT: v_mov_b32_e32 v9, 0x7e00
4051
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
4052
- ; SI-NEXT: s_movk_i32 s6, 0x40f
4053
- ; SI-NEXT: v_cndmask_b32_e32 v4, v8, v9, vcc
4054
- ; SI-NEXT: v_cmp_eq_u32_e32 vcc, s6, v7
4055
- ; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
4056
- ; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
4057
- ; SI-NEXT: v_and_b32_e32 v6, 0x1ff, v3
4058
- ; SI-NEXT: v_and_b32_e32 v5, 0x8000, v5
4059
- ; SI-NEXT: v_or_b32_e32 v2, v6, v2
4060
- ; SI-NEXT: v_or_b32_e32 v4, v5, v4
4061
- ; SI-NEXT: v_lshrrev_b32_e32 v5, 8, v3
4062
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
4063
- ; SI-NEXT: v_and_b32_e32 v5, 0xffe, v5
4064
- ; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
4065
- ; SI-NEXT: v_bfe_u32 v6, v3, 20, 11
4066
- ; SI-NEXT: v_or_b32_e32 v2, v5, v2
4067
- ; SI-NEXT: v_sub_i32_e32 v7, vcc, s4, v6
4068
- ; SI-NEXT: v_or_b32_e32 v5, 0x1000, v2
4069
- ; SI-NEXT: v_med3_i32 v7, v7, 0, 13
4070
- ; SI-NEXT: v_lshrrev_b32_e32 v10, v7, v5
4071
- ; SI-NEXT: v_lshlrev_b32_e32 v7, v7, v10
4072
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, v7, v5
4073
- ; SI-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
4074
- ; SI-NEXT: v_add_i32_e32 v6, vcc, s5, v6
4075
- ; SI-NEXT: v_lshlrev_b32_e32 v7, 12, v6
4076
- ; SI-NEXT: v_or_b32_e32 v5, v10, v5
4077
- ; SI-NEXT: v_or_b32_e32 v7, v2, v7
4078
- ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 1, v6
4079
- ; SI-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
4080
- ; SI-NEXT: v_and_b32_e32 v7, 7, v5
4081
- ; SI-NEXT: v_cmp_lt_i32_e32 vcc, 5, v7
4082
- ; SI-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
4083
- ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7
4084
- ; SI-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
4085
- ; SI-NEXT: v_or_b32_e32 v7, v7, v10
4086
- ; SI-NEXT: v_lshrrev_b32_e32 v5, 2, v5
4087
- ; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v7
4088
- ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 31, v6
4089
- ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
4090
4016
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
4091
- ; SI-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
4092
- ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
4093
- ; SI-NEXT: v_cndmask_b32_e32 v2, v8, v9, vcc
4094
- ; SI-NEXT: v_cmp_eq_u32_e32 vcc, s6, v6
4095
- ; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
4096
- ; SI-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
4097
- ; SI-NEXT: v_and_b32_e32 v3, 0x8000, v3
4098
- ; SI-NEXT: v_or_b32_e32 v2, v3, v2
4099
- ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
4100
- ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
4101
- ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
4102
- ; SI-NEXT: v_cvt_f32_f16_e32 v3, v4
4017
+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
4103
4018
; SI-NEXT: s_brev_b32 s4, -2
4104
- ; SI-NEXT: v_bfi_b32 v0, s4, v0, v2
4105
- ; SI-NEXT: v_bfi_b32 v1, s4, v1, v3
4019
+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
4020
+ ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
4021
+ ; SI-NEXT: v_bfi_b32 v0, s4, v0, v3
4022
+ ; SI-NEXT: v_bfi_b32 v1, s4, v1, v5
4106
4023
; SI-NEXT: s_setpc_b64 s[30:31]
4107
4024
;
4108
4025
; VI-LABEL: v_copysign_out_v2f16_mag_v2f16_sign_v2f64:
@@ -4900,99 +4817,16 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f16_sign_v2f32(<2 x half> inreg
4900
4817
define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f16_sign_v2f64 (<2 x half > inreg %mag , <2 x double > inreg %sign ) {
4901
4818
; SI-LABEL: s_copysign_out_v2f16_mag_v2f16_sign_v2f64:
4902
4819
; SI: ; %bb.0:
4903
- ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
4904
- ; SI-NEXT: s_lshr_b32 s0, s3, 8
4905
- ; SI-NEXT: s_and_b32 s6, s0, 0xffe
4906
- ; SI-NEXT: s_and_b32 s0, s3, 0x1ff
4907
- ; SI-NEXT: s_or_b32 s0, s0, s2
4908
- ; SI-NEXT: s_cmp_lg_u32 s0, 0
4909
4820
; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
4910
- ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
4911
- ; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
4912
- ; SI-NEXT: v_readfirstlane_b32 s0, v2
4913
- ; SI-NEXT: s_bfe_u32 s2, s3, 0xb0014
4914
- ; SI-NEXT: s_or_b32 s0, s6, s0
4915
- ; SI-NEXT: s_sub_i32 s6, 0x3f1, s2
4916
- ; SI-NEXT: v_med3_i32 v2, s6, 0, 13
4917
- ; SI-NEXT: s_or_b32 s1, s0, 0x1000
4918
- ; SI-NEXT: v_readfirstlane_b32 s6, v2
4919
- ; SI-NEXT: s_lshr_b32 s7, s1, s6
4920
- ; SI-NEXT: s_lshl_b32 s6, s7, s6
4921
- ; SI-NEXT: s_cmp_lg_u32 s6, s1
4922
- ; SI-NEXT: s_cselect_b32 s1, 1, 0
4923
- ; SI-NEXT: s_addk_i32 s2, 0xfc10
4924
- ; SI-NEXT: s_lshl_b32 s6, s2, 12
4925
- ; SI-NEXT: s_or_b32 s1, s7, s1
4926
- ; SI-NEXT: s_or_b32 s6, s0, s6
4927
- ; SI-NEXT: s_cmp_lt_i32 s2, 1
4928
- ; SI-NEXT: s_cselect_b32 s1, s1, s6
4929
- ; SI-NEXT: s_and_b32 s6, s1, 7
4930
- ; SI-NEXT: s_cmp_gt_i32 s6, 5
4931
- ; SI-NEXT: s_cselect_b32 s7, 1, 0
4932
- ; SI-NEXT: s_cmp_eq_u32 s6, 3
4933
- ; SI-NEXT: s_cselect_b32 s6, 1, 0
4934
- ; SI-NEXT: s_or_b32 s6, s6, s7
4935
- ; SI-NEXT: s_lshr_b32 s1, s1, 2
4936
- ; SI-NEXT: s_add_i32 s1, s1, s6
4937
- ; SI-NEXT: s_cmp_lt_i32 s2, 31
4938
- ; SI-NEXT: s_cselect_b32 s1, s1, 0x7c00
4939
- ; SI-NEXT: s_cmp_lg_u32 s0, 0
4940
- ; SI-NEXT: s_movk_i32 s6, 0x7e00
4941
- ; SI-NEXT: s_cselect_b32 s0, s6, 0x7c00
4942
- ; SI-NEXT: s_cmpk_eq_i32 s2, 0x40f
4943
- ; SI-NEXT: s_cselect_b32 s0, s0, s1
4944
- ; SI-NEXT: s_lshr_b32 s1, s3, 16
4945
- ; SI-NEXT: s_and_b32 s1, s1, 0x8000
4946
- ; SI-NEXT: s_or_b32 s2, s1, s0
4947
- ; SI-NEXT: s_lshr_b32 s0, s5, 8
4948
- ; SI-NEXT: s_and_b32 s3, s0, 0xffe
4949
- ; SI-NEXT: s_and_b32 s0, s5, 0x1ff
4950
- ; SI-NEXT: s_or_b32 s0, s0, s4
4951
- ; SI-NEXT: s_cmp_lg_u32 s0, 0
4952
- ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
4953
- ; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
4954
- ; SI-NEXT: v_readfirstlane_b32 s0, v2
4955
- ; SI-NEXT: s_or_b32 s0, s3, s0
4956
- ; SI-NEXT: s_bfe_u32 s3, s5, 0xb0014
4957
- ; SI-NEXT: s_sub_i32 s4, 0x3f1, s3
4958
- ; SI-NEXT: v_med3_i32 v2, s4, 0, 13
4959
- ; SI-NEXT: s_or_b32 s1, s0, 0x1000
4960
- ; SI-NEXT: v_readfirstlane_b32 s4, v2
4961
- ; SI-NEXT: s_lshr_b32 s7, s1, s4
4962
- ; SI-NEXT: s_lshl_b32 s4, s7, s4
4963
- ; SI-NEXT: s_cmp_lg_u32 s4, s1
4964
- ; SI-NEXT: s_cselect_b32 s1, 1, 0
4965
- ; SI-NEXT: s_addk_i32 s3, 0xfc10
4966
- ; SI-NEXT: s_lshl_b32 s4, s3, 12
4967
- ; SI-NEXT: s_or_b32 s1, s7, s1
4968
- ; SI-NEXT: s_or_b32 s4, s0, s4
4969
- ; SI-NEXT: s_cmp_lt_i32 s3, 1
4970
- ; SI-NEXT: s_cselect_b32 s1, s1, s4
4971
- ; SI-NEXT: s_and_b32 s4, s1, 7
4972
- ; SI-NEXT: s_cmp_gt_i32 s4, 5
4973
- ; SI-NEXT: s_cselect_b32 s7, 1, 0
4974
- ; SI-NEXT: s_cmp_eq_u32 s4, 3
4975
- ; SI-NEXT: s_cselect_b32 s4, 1, 0
4976
- ; SI-NEXT: s_or_b32 s4, s4, s7
4977
- ; SI-NEXT: s_lshr_b32 s1, s1, 2
4978
- ; SI-NEXT: s_add_i32 s1, s1, s4
4979
- ; SI-NEXT: s_cmp_lt_i32 s3, 31
4980
- ; SI-NEXT: s_cselect_b32 s1, s1, 0x7c00
4981
- ; SI-NEXT: s_cmp_lg_u32 s0, 0
4982
- ; SI-NEXT: s_cselect_b32 s0, s6, 0x7c00
4983
- ; SI-NEXT: s_cmpk_eq_i32 s3, 0x40f
4984
- ; SI-NEXT: s_cselect_b32 s0, s0, s1
4985
- ; SI-NEXT: s_lshr_b32 s1, s5, 16
4986
- ; SI-NEXT: s_and_b32 s1, s1, 0x8000
4987
- ; SI-NEXT: s_or_b32 s0, s1, s0
4821
+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
4822
+ ; SI-NEXT: s_brev_b32 s0, -2
4823
+ ; SI-NEXT: v_mov_b32_e32 v2, s5
4988
4824
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
4989
- ; SI-NEXT: v_cvt_f32_f16_e32 v2, s0
4990
4825
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
4991
- ; SI-NEXT: v_cvt_f32_f16_e32 v3, s2
4992
- ; SI-NEXT: s_brev_b32 s0, -2
4993
4826
; SI-NEXT: v_bfi_b32 v0, s0, v0, v2
4827
+ ; SI-NEXT: v_mov_b32_e32 v2, s3
4994
4828
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
4995
- ; SI-NEXT: v_bfi_b32 v1, s0, v1, v3
4829
+ ; SI-NEXT: v_bfi_b32 v1, s0, v1, v2
4996
4830
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
4997
4831
; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4998
4832
; SI-NEXT: v_or_b32_e32 v0, v1, v0
0 commit comments