@@ -1913,124 +1913,6 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1913
1913
; GFX9-NEXT: global_store_short v0, v1, s[0:1]
1914
1914
; GFX9-NEXT: s_endpgm
1915
1915
;
1916
- <<<<<<< HEAD
1917
- ; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f64_sign_f16:
1918
- ; GFX11-TRUE16: ; %bb.0:
1919
- ; GFX11-TRUE16-NEXT: s_clause 0x1
1920
- ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1921
- ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34
1922
- ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1923
- ; GFX11-TRUE16-NEXT: s_and_b32 s5, s3, 0x1ff
1924
- ; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 8
1925
- ; GFX11-TRUE16-NEXT: s_or_b32 s2, s5, s2
1926
- ; GFX11-TRUE16-NEXT: s_and_b32 s5, s6, 0xffe
1927
- ; GFX11-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
1928
- ; GFX11-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
1929
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1930
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
1931
- ; GFX11-TRUE16-NEXT: s_bfe_u32 s2, s3, 0xb0014
1932
- ; GFX11-TRUE16-NEXT: s_sub_i32 s3, 0x3f1, s2
1933
- ; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0xfc10
1934
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1935
- ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0
1936
- ; GFX11-TRUE16-NEXT: v_med3_i32 v1, s3, 0, 13
1937
- ; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s2, 12
1938
- ; GFX11-TRUE16-NEXT: s_cmp_lt_i32 s2, 1
1939
- ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x1000, v0
1940
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1941
- ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, v1, v2
1942
- ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, v1, v3
1943
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1944
- ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2
1945
- ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, s3, v0
1946
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1947
- ; GFX11-TRUE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
1948
- ; GFX11-TRUE16-NEXT: s_cmp_lt_i32 s2, 31
1949
- ; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v3, v1
1950
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1951
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
1952
- ; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 7, v1
1953
- ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 2, v1
1954
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
1955
- ; GFX11-TRUE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v2
1956
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1957
- ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v2
1958
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1959
- ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1960
- ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v3
1961
- ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0x7e00
1962
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1963
- ; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, 0x7c00, v3 :: v_dual_add_nc_u32 v1, v1, v2
1964
- ; GFX11-TRUE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
1965
- ; GFX11-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
1966
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, 0x7c00, v1, vcc_lo
1967
- ; GFX11-TRUE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
1968
- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1969
- ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
1970
- ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s4
1971
- ; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
1972
- ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
1973
- ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
1974
- ; GFX11-TRUE16-NEXT: s_endpgm
1975
- ;
1976
- ; GFX11-FAKE16-LABEL: s_copysign_out_f16_mag_f64_sign_f16:
1977
- ; GFX11-FAKE16: ; %bb.0:
1978
- ; GFX11-FAKE16-NEXT: s_clause 0x1
1979
- ; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1980
- ; GFX11-FAKE16-NEXT: s_load_b32 s4, s[4:5], 0x34
1981
- ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1982
- ; GFX11-FAKE16-NEXT: s_and_b32 s5, s3, 0x1ff
1983
- ; GFX11-FAKE16-NEXT: s_lshr_b32 s6, s3, 8
1984
- ; GFX11-FAKE16-NEXT: s_or_b32 s2, s5, s2
1985
- ; GFX11-FAKE16-NEXT: s_and_b32 s5, s6, 0xffe
1986
- ; GFX11-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
1987
- ; GFX11-FAKE16-NEXT: s_cselect_b32 s2, -1, 0
1988
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1989
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
1990
- ; GFX11-FAKE16-NEXT: s_bfe_u32 s2, s3, 0xb0014
1991
- ; GFX11-FAKE16-NEXT: s_sub_i32 s3, 0x3f1, s2
1992
- ; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0xfc10
1993
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1994
- ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0
1995
- ; GFX11-FAKE16-NEXT: v_med3_i32 v1, s3, 0, 13
1996
- ; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s2, 12
1997
- ; GFX11-FAKE16-NEXT: s_cmp_lt_i32 s2, 1
1998
- ; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x1000, v0
1999
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2000
- ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, v1, v2
2001
- ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, v1, v3
2002
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
2003
- ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2
2004
- ; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, s3, v0
2005
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
2006
- ; GFX11-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
2007
- ; GFX11-FAKE16-NEXT: s_cmp_lt_i32 s2, 31
2008
- ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v3, v1
2009
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2010
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
2011
- ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 7, v1
2012
- ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 2, v1
2013
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
2014
- ; GFX11-FAKE16-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v2
2015
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
2016
- ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v2
2017
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
2018
- ; GFX11-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
2019
- ; GFX11-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
2020
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2021
- ; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v3
2022
- ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, 0x7e00 :: v_dual_add_nc_u32 v1, v1, v2
2023
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2024
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7c00, v1, vcc_lo
2025
- ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
2026
- ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v2, vcc_lo
2027
- ; GFX11-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
2028
- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2029
- ; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_mov_b32 v1, 0
2030
- ; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, s4
2031
- ; GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
2032
- ; GFX11-FAKE16-NEXT: s_endpgm
2033
- =======
2034
1916
; GFX11-LABEL: s_copysign_out_f16_mag_f64_sign_f16:
2035
1917
; GFX11: ; %bb.0:
2036
1918
; GFX11-NEXT: s_clause 0x1
@@ -2047,49 +1929,47 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
2047
1929
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
2048
1930
; GFX11-NEXT: s_bfe_u32 s2, s3, 0xb0014
2049
1931
; GFX11-NEXT: s_sub_i32 s3, 0x3f1, s2
2050
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2051
- ; GFX11-NEXT: v_med3_i32 v1, s3, 0, 13
2052
- ; GFX11-NEXT: v_readfirstlane_b32 s3, v0
2053
- ; GFX11-NEXT: v_mov_b32_e32 v0, s4
2054
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
2055
- ; GFX11-NEXT: v_readfirstlane_b32 s6, v1
2056
- ; GFX11-NEXT: v_mov_b32_e32 v1, 0
2057
- ; GFX11-NEXT: s_or_b32 s3, s5, s3
2058
- ; GFX11-NEXT: s_or_b32 s5, s3, 0x1000
2059
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2060
- ; GFX11-NEXT: s_lshr_b32 s7, s5, s6
2061
- ; GFX11-NEXT: s_lshl_b32 s6, s7, s6
2062
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
2063
- ; GFX11-NEXT: s_cmp_lg_u32 s6, s5
2064
- ; GFX11-NEXT: s_cselect_b32 s5, 1, 0
2065
1932
; GFX11-NEXT: s_addk_i32 s2, 0xfc10
2066
- ; GFX11-NEXT: s_or_b32 s5, s7, s5
2067
- ; GFX11-NEXT: s_lshl_b32 s6, s2, 12
2068
- ; GFX11-NEXT: s_or_b32 s6, s3, s6
1933
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1934
+ ; GFX11-NEXT: v_or_b32_e32 v0, s5, v0
1935
+ ; GFX11-NEXT: v_med3_i32 v1, s3, 0, 13
1936
+ ; GFX11-NEXT: s_lshl_b32 s3, s2, 12
2069
1937
; GFX11-NEXT: s_cmp_lt_i32 s2, 1
2070
- ; GFX11-NEXT: s_cselect_b32 s5, s5, s6
2071
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2072
- ; GFX11-NEXT: s_and_b32 s6, s5, 7
2073
- ; GFX11-NEXT: s_cmp_gt_i32 s6, 5
2074
- ; GFX11-NEXT: s_cselect_b32 s7, 1, 0
2075
- ; GFX11-NEXT: s_cmp_eq_u32 s6, 3
2076
- ; GFX11-NEXT: s_cselect_b32 s6, 1, 0
2077
- ; GFX11-NEXT: s_lshr_b32 s5, s5, 2
2078
- ; GFX11-NEXT: s_or_b32 s6, s6, s7
2079
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2080
- ; GFX11-NEXT: s_add_i32 s5, s5, s6
1938
+ ; GFX11-NEXT: v_or_b32_e32 v2, 0x1000, v0
1939
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1940
+ ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v1, v2
1941
+ ; GFX11-NEXT: v_lshlrev_b32_e32 v1, v1, v3
1942
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1943
+ ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2
1944
+ ; GFX11-NEXT: v_or_b32_e32 v2, s3, v0
1945
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1946
+ ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
2081
1947
; GFX11-NEXT: s_cmp_lt_i32 s2, 31
2082
- ; GFX11-NEXT: s_movk_i32 s6, 0x7e00
2083
- ; GFX11-NEXT: s_cselect_b32 s5, s5, 0x7c00
2084
- ; GFX11-NEXT: s_cmp_lg_u32 s3, 0
2085
- ; GFX11-NEXT: s_cselect_b32 s3, s6, 0x7c00
1948
+ ; GFX11-NEXT: v_or_b32_e32 v1, v3, v1
1949
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1950
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
1951
+ ; GFX11-NEXT: v_and_b32_e32 v2, 7, v1
1952
+ ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 2, v1
1953
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
1954
+ ; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 5, v2
1955
+ ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1956
+ ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v2
1957
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1958
+ ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
2086
1959
; GFX11-NEXT: s_cmpk_eq_i32 s2, 0x40f
2087
- ; GFX11-NEXT: s_cselect_b32 s2, s3, s5
2088
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2089
- ; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, s2, v0
1960
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1961
+ ; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
1962
+ ; GFX11-NEXT: v_dual_mov_b32 v2, 0x7e00 :: v_dual_add_nc_u32 v1, v1, v2
1963
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1964
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7c00, v1, vcc_lo
1965
+ ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1966
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v2, vcc_lo
1967
+ ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1968
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1969
+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_mov_b32 v1, 0
1970
+ ; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, s4
2090
1971
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
2091
1972
; GFX11-NEXT: s_endpgm
2092
- >>>>>>> 41d8a9928050 (16bit sgpr folding)
2093
1973
%mag.trunc = fptrunc double %mag to half
2094
1974
%result = call half @llvm.copysign.f16 (half %mag.trunc , half %sign )
2095
1975
store half %result , ptr addrspace (1 ) %arg_out
0 commit comments