Skip to content

Commit 2da0565

Browse files
committed
AMDGPU: Add a subtarget feature for fine-grained remote memory support
Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work.
1 parent 91bc4d5 commit 2da0565

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst
788788
"Has flat_atomic_add_f32 instruction"
789789
>;
790790

791+
def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
792+
: SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
793+
"HasAgentScopeFineGrainedRemoteMemoryAtomics",
794+
"true",
795+
"Agent (device) scoped atomic operations not directly supported by "
796+
"PCIe work for allocations in host or peer PCIe device memory"
797+
>;
798+
791799
def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
792800
"HasDefaultComponentZero",
793801
"true",
@@ -1207,7 +1215,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
12071215
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
12081216
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
12091217
FeatureMaxHardClauseLength32,
1210-
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
1218+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1219+
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
12111220
]
12121221
>;
12131222

@@ -1415,7 +1424,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14151424
FeatureBackOffBarrier,
14161425
FeatureKernargPreload,
14171426
FeatureAtomicFMinFMaxF64GlobalInsts,
1418-
FeatureAtomicFMinFMaxF64FlatInsts
1427+
FeatureAtomicFMinFMaxF64FlatInsts,
1428+
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
14191429
]>;
14201430

14211431
def FeatureISAVersion9_4_0 : FeatureSet<

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
174174
bool HasAtomicBufferPkAddBF16Inst = false;
175175
bool HasFlatAtomicFaddF32Inst = false;
176176
bool HasDefaultComponentZero = false;
177+
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
177178
bool HasDefaultComponentBroadcast = false;
178179
/// The maximum number of instructions that may be placed within an S_CLAUSE,
179180
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
@@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
871872

872873
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
873874

875+
/// \return true if atomic operations targeting fine-grained memory work
876+
/// correctly at device scope, in allocations in host or peer PCIe device
877+
/// memory.
878+
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {
879+
return HasAgentScopeFineGrainedRemoteMemoryAtomics;
880+
}
881+
874882
bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
875883

876884
bool hasDefaultComponentBroadcast() const {

0 commit comments

Comments
 (0)