@@ -1901,4 +1901,176 @@ define float @test_atomicrmw_fadd_f32_global_one_as(ptr addrspace(1) %ptr, float
1901
1901
ret float %res
1902
1902
}
1903
1903
1904
+ define void @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp (ptr addrspace (1 ) %ptr , float %value ) #1 {
1905
+ ; CI-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1906
+ ; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
1907
+ ; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
1908
+ ; CI: atomicrmw.start:
1909
+ ; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1910
+ ; CI-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1911
+ ; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1912
+ ; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1913
+ ; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
1914
+ ; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1915
+ ; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1916
+ ; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1917
+ ; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1918
+ ; CI: atomicrmw.end:
1919
+ ; CI-NEXT: ret void
1920
+ ;
1921
+ ; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1922
+ ; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
1923
+ ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
1924
+ ; GFX9: atomicrmw.start:
1925
+ ; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1926
+ ; GFX9-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1927
+ ; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1928
+ ; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1929
+ ; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
1930
+ ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1931
+ ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1932
+ ; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1933
+ ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1934
+ ; GFX9: atomicrmw.end:
1935
+ ; GFX9-NEXT: ret void
1936
+ ;
1937
+ ; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1938
+ ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1939
+ ; GFX908-NEXT: ret void
1940
+ ;
1941
+ ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1942
+ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1943
+ ; GFX90A-NEXT: ret void
1944
+ ;
1945
+ ; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1946
+ ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1947
+ ; GFX940-NEXT: ret void
1948
+ ;
1949
+ ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1950
+ ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1951
+ ; GFX11-NEXT: ret void
1952
+ ;
1953
+ %res = atomicrmw fadd ptr addrspace (1 ) %ptr , float %value syncscope("wavefront" ) monotonic
1954
+ ret void
1955
+ }
1956
+
1957
+ define double @test_atomicrmw_fadd_f64_global_unsafe_strictfp (ptr addrspace (1 ) %ptr , double %value ) #1 {
1958
+ ; CI-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1959
+ ; CI-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1960
+ ; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
1961
+ ; CI: atomicrmw.start:
1962
+ ; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1963
+ ; CI-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
1964
+ ; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1965
+ ; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1966
+ ; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1967
+ ; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1968
+ ; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1969
+ ; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
1970
+ ; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1971
+ ; CI: atomicrmw.end:
1972
+ ; CI-NEXT: ret double [[TMP5]]
1973
+ ;
1974
+ ; GFX9-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1975
+ ; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1976
+ ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
1977
+ ; GFX9: atomicrmw.start:
1978
+ ; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1979
+ ; GFX9-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
1980
+ ; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1981
+ ; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1982
+ ; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1983
+ ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1984
+ ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1985
+ ; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
1986
+ ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1987
+ ; GFX9: atomicrmw.end:
1988
+ ; GFX9-NEXT: ret double [[TMP5]]
1989
+ ;
1990
+ ; GFX908-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1991
+ ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1992
+ ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1993
+ ; GFX908: atomicrmw.start:
1994
+ ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1995
+ ; GFX908-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1996
+ ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1997
+ ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1998
+ ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1999
+ ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
2000
+ ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
2001
+ ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
2002
+ ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2003
+ ; GFX908: atomicrmw.end:
2004
+ ; GFX908-NEXT: ret double [[TMP5]]
2005
+ ;
2006
+ ; GFX90A-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2007
+ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
2008
+ ; GFX90A-NEXT: ret double [[RES]]
2009
+ ;
2010
+ ; GFX940-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2011
+ ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
2012
+ ; GFX940-NEXT: ret double [[RES]]
2013
+ ;
2014
+ ; GFX11-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2015
+ ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
2016
+ ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2017
+ ; GFX11: atomicrmw.start:
2018
+ ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2019
+ ; GFX11-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
2020
+ ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
2021
+ ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
2022
+ ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
2023
+ ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
2024
+ ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
2025
+ ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
2026
+ ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2027
+ ; GFX11: atomicrmw.end:
2028
+ ; GFX11-NEXT: ret double [[TMP5]]
2029
+ ;
2030
+ %res = atomicrmw fadd ptr addrspace (1 ) %ptr , double %value syncscope("wavefront" ) monotonic
2031
+ ret double %res
2032
+ }
2033
+
2034
+ define float @test_atomicrmw_fadd_f32_local_strictfp (ptr addrspace (3 ) %ptr , float %value ) #2 {
2035
+ ; CI-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2036
+ ; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[PTR:%.*]], align 4
2037
+ ; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
2038
+ ; CI: atomicrmw.start:
2039
+ ; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2040
+ ; CI-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
2041
+ ; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2042
+ ; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2043
+ ; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
2044
+ ; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2045
+ ; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2046
+ ; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2047
+ ; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2048
+ ; CI: atomicrmw.end:
2049
+ ; CI-NEXT: ret float [[TMP5]]
2050
+ ;
2051
+ ; GFX9-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2052
+ ; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2053
+ ; GFX9-NEXT: ret float [[RES]]
2054
+ ;
2055
+ ; GFX908-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2056
+ ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2057
+ ; GFX908-NEXT: ret float [[RES]]
2058
+ ;
2059
+ ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2060
+ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2061
+ ; GFX90A-NEXT: ret float [[RES]]
2062
+ ;
2063
+ ; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2064
+ ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2065
+ ; GFX940-NEXT: ret float [[RES]]
2066
+ ;
2067
+ ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2068
+ ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2069
+ ; GFX11-NEXT: ret float [[RES]]
2070
+ ;
2071
+ %res = atomicrmw fadd ptr addrspace (3 ) %ptr , float %value seq_cst
2072
+ ret float %res
2073
+ }
1904
2074
attributes #0 = { "denormal-fp-math-f32" ="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics" ="true" }
2075
+ attributes #1 = { strictfp "denormal-fp-math-f32" ="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics" ="true" }
2076
+ attributes #2 = { strictfp}
0 commit comments