Skip to content

Commit 409815d

Browse files
authored
AMDGPU: Add subtarget feature for global atomic fadd denormal support (#96443)
Not sure what the behavior for gfx90a is. The SPG says it always flushes. The instruction documentation says it does not.
1 parent 78dcd02 commit 409815d

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
788788
"Has flat_atomic_add_f32 instruction"
789789
>;
790790

791+
def FeatureMemoryAtomicFAddF32DenormalSupport
792+
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
793+
"HasMemoryAtomicFaddF32DenormalSupport",
794+
"true",
795+
"global/flat/buffer atomic fadd for float supports denormal handling"
796+
>;
797+
791798
def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
792799
: SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
793800
"HasAgentScopeFineGrainedRemoteMemoryAtomics",
@@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14271434
FeatureKernargPreload,
14281435
FeatureAtomicFMinFMaxF64GlobalInsts,
14291436
FeatureAtomicFMinFMaxF64FlatInsts,
1430-
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
1437+
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
1438+
FeatureMemoryAtomicFAddF32DenormalSupport
14311439
]>;
14321440

14331441
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1539,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet<
15391547
FeatureFlatAtomicFaddF32Inst,
15401548
FeatureImageInsts,
15411549
FeaturePackedTID,
1542-
FeatureVcmpxPermlaneHazard]>;
1550+
FeatureVcmpxPermlaneHazard,
1551+
FeatureMemoryAtomicFAddF32DenormalSupport]>;
15431552

15441553
// There are few workarounds that need to be
15451554
// added to all targets. This pessimizes codegen
@@ -1631,7 +1640,9 @@ def FeatureISAVersion12 : FeatureSet<
16311640
FeatureScalarDwordx3Loads,
16321641
FeatureDPPSrc1SGPR,
16331642
FeatureMaxHardClauseLength32,
1634-
Feature1_5xVGPRs]>;
1643+
Feature1_5xVGPRs,
1644+
FeatureMemoryAtomicFAddF32DenormalSupport
1645+
]>;
16351646

16361647
def FeatureISAVersion12_Generic: FeatureSet<
16371648
!listconcat(FeatureISAVersion12.Features,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
167167
bool HasAtomicFlatPkAdd16Insts = false;
168168
bool HasAtomicFaddRtnInsts = false;
169169
bool HasAtomicFaddNoRtnInsts = false;
170+
bool HasMemoryAtomicFaddF32DenormalSupport = false;
170171
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
171172
bool HasAtomicBufferGlobalPkAddF16Insts = false;
172173
bool HasAtomicCSubNoRtnInsts = false;
@@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
872873

873874
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
874875

876+
/// \return true if the target's flat, global, and buffer atomic fadd for
877+
/// float supports denormal handling.
878+
bool hasMemoryAtomicFaddF32DenormalSupport() const {
879+
return HasMemoryAtomicFaddF32DenormalSupport;
880+
}
881+
875882
/// \return true if atomic operations targeting fine-grained memory work
876883
/// correctly at device scope, in allocations in host or peer PCIe device
877884
/// memory.

0 commit comments

Comments
 (0)