Skip to content

Commit 3701ebe

Browse files
committed
AtomicExpand: Fix expanding atomics into unconstrained FP in strictfp functions
Ideally the normal fadd/fmin/fmax this was creating would fail the verifier. It's probably also necessary to force off FP exception handlers in the cmpxchg loop but we don't have a generic way to do that now. Note strictfp builder is broken in the minnum/maxnum case https://reviews.llvm.org/D154993
1 parent b59022b commit 3701ebe

File tree

6 files changed

+432
-189
lines changed

6 files changed

+432
-189
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,6 +1576,11 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
15761576
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
15771577
CreateCmpXchgInstFun CreateCmpXchg) {
15781578
ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1579+
Builder.setIsFPConstrained(
1580+
AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1581+
1582+
// FIXME: If FP exceptions are observable, we should force them off for the
1583+
// loop for the FP atomics.
15791584
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
15801585
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
15811586
AI->getOrdering(), AI->getSyncScopeID(),

llvm/lib/Transforms/Utils/LowerAtomic.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
101101

102102
bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
103103
IRBuilder<> Builder(RMWI);
104+
Builder.setIsFPConstrained(
105+
RMWI->getFunction()->hasFnAttribute(Attribute::StrictFP));
106+
104107
Value *Ptr = RMWI->getPointerOperand();
105108
Value *Val = RMWI->getValOperand();
106109

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,4 +1901,176 @@ define float @test_atomicrmw_fadd_f32_global_one_as(ptr addrspace(1) %ptr, float
19011901
ret float %res
19021902
}
19031903

1904+
define void @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(ptr addrspace(1) %ptr, float %value) #1 {
1905+
; CI-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1906+
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
1907+
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
1908+
; CI: atomicrmw.start:
1909+
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1910+
; CI-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1911+
; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1912+
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1913+
; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
1914+
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1915+
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1916+
; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1917+
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1918+
; CI: atomicrmw.end:
1919+
; CI-NEXT: ret void
1920+
;
1921+
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1922+
; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
1923+
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
1924+
; GFX9: atomicrmw.start:
1925+
; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1926+
; GFX9-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1927+
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1928+
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1929+
; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 4
1930+
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1931+
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1932+
; GFX9-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1933+
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1934+
; GFX9: atomicrmw.end:
1935+
; GFX9-NEXT: ret void
1936+
;
1937+
; GFX908-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1938+
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1939+
; GFX908-NEXT: ret void
1940+
;
1941+
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1942+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1943+
; GFX90A-NEXT: ret void
1944+
;
1945+
; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1946+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1947+
; GFX940-NEXT: ret void
1948+
;
1949+
; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_structfp(
1950+
; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4
1951+
; GFX11-NEXT: ret void
1952+
;
1953+
%res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("wavefront") monotonic
1954+
ret void
1955+
}
1956+
1957+
define double @test_atomicrmw_fadd_f64_global_unsafe_strictfp(ptr addrspace(1) %ptr, double %value) #1 {
1958+
; CI-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1959+
; CI-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1960+
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
1961+
; CI: atomicrmw.start:
1962+
; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1963+
; CI-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
1964+
; CI-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1965+
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1966+
; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1967+
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1968+
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1969+
; CI-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
1970+
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1971+
; CI: atomicrmw.end:
1972+
; CI-NEXT: ret double [[TMP5]]
1973+
;
1974+
; GFX9-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1975+
; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1976+
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
1977+
; GFX9: atomicrmw.start:
1978+
; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1979+
; GFX9-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
1980+
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1981+
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1982+
; GFX9-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1983+
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1984+
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1985+
; GFX9-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
1986+
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1987+
; GFX9: atomicrmw.end:
1988+
; GFX9-NEXT: ret double [[TMP5]]
1989+
;
1990+
; GFX908-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
1991+
; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
1992+
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1993+
; GFX908: atomicrmw.start:
1994+
; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1995+
; GFX908-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
1996+
; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1997+
; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1998+
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
1999+
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
2000+
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
2001+
; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
2002+
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2003+
; GFX908: atomicrmw.end:
2004+
; GFX908-NEXT: ret double [[TMP5]]
2005+
;
2006+
; GFX90A-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2007+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
2008+
; GFX90A-NEXT: ret double [[RES]]
2009+
;
2010+
; GFX940-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2011+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8
2012+
; GFX940-NEXT: ret double [[RES]]
2013+
;
2014+
; GFX11-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp(
2015+
; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
2016+
; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2017+
; GFX11: atomicrmw.start:
2018+
; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2019+
; GFX11-NEXT: [[NEW:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]]
2020+
; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
2021+
; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
2022+
; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("wavefront") monotonic monotonic, align 8
2023+
; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
2024+
; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
2025+
; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
2026+
; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2027+
; GFX11: atomicrmw.end:
2028+
; GFX11-NEXT: ret double [[TMP5]]
2029+
;
2030+
%res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("wavefront") monotonic
2031+
ret double %res
2032+
}
2033+
2034+
define float @test_atomicrmw_fadd_f32_local_strictfp(ptr addrspace(3) %ptr, float %value) #2 {
2035+
; CI-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2036+
; CI-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[PTR:%.*]], align 4
2037+
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
2038+
; CI: atomicrmw.start:
2039+
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2040+
; CI-NEXT: [[NEW:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LOADED]], float [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
2041+
; CI-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2042+
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2043+
; CI-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4
2044+
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2045+
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2046+
; CI-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2047+
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2048+
; CI: atomicrmw.end:
2049+
; CI-NEXT: ret float [[TMP5]]
2050+
;
2051+
; GFX9-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2052+
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2053+
; GFX9-NEXT: ret float [[RES]]
2054+
;
2055+
; GFX908-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2056+
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2057+
; GFX908-NEXT: ret float [[RES]]
2058+
;
2059+
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2060+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2061+
; GFX90A-NEXT: ret float [[RES]]
2062+
;
2063+
; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2064+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2065+
; GFX940-NEXT: ret float [[RES]]
2066+
;
2067+
; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_strictfp(
2068+
; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
2069+
; GFX11-NEXT: ret float [[RES]]
2070+
;
2071+
%res = atomicrmw fadd ptr addrspace(3) %ptr, float %value seq_cst
2072+
ret float %res
2073+
}
19042074
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
2075+
attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
2076+
attributes #2 = { strictfp}

0 commit comments

Comments
 (0)