Skip to content

Commit 4acca90

Browse files
GeorgeWebkbenzie
andauthored
[CLC][AMDGPU] Refactor fence helper to process order semantic explicitly (#12872)
This PR refactors the builtin fence helper macro for AMDGPU to take in and process the order semantic explicitly because that is the only semantic argument accepted by the amdgcn builtin. Additionally, makes the `None` (Monotonic) order semantic which maps to C++/SYCL's `relaxed` to be a no-op instead of falling back to the previous `acq_rel` default order. --------- Co-authored-by: Kenneth Benzie (Benie) <[email protected]>
1 parent 32911b2 commit 4acca90

File tree

2 files changed

+40
-20
lines changed

2 files changed

+40
-20
lines changed

libclc/amdgcn-amdhsa/libspirv/synchronization/barrier.cl

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,42 +10,56 @@
1010
#include <spirv/spirv.h>
1111
#include <spirv/spirv_types.h>
1212

13-
#define BUILTIN_FENCE(semantics, scope_memory) \
14-
if (semantics & Acquire) \
15-
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \
16-
else if (semantics & Release) \
17-
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \
18-
else if (semantics & AcquireRelease) \
19-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \
20-
else if (semantics & SequentiallyConsistent) \
21-
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \
22-
else \
23-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory);
2413

25-
_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory,
26-
unsigned int semantics) {
14+
#define BUILTIN_FENCE(order, scope_memory) \
15+
/* None implies Monotonic (for llvm/AMDGPU), or relaxed in C++. \
16+
* This does not make sense as ordering argument for a fence instruction \
17+
* and is not part of the supported orderings for a fence in AMDGPU. */ \
18+
if (order != None) { \
19+
switch (order) { \
20+
case Acquire: \
21+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \
22+
case Release: \
23+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \
24+
case AcquireRelease: \
25+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \
26+
case SequentiallyConsistent: \
27+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \
28+
default: \
29+
__builtin_trap(); \
30+
__builtin_unreachable(); \
31+
} \
32+
}
33+
34+
_CLC_INLINE void builtin_fence_order(unsigned int scope_memory,
35+
unsigned int order) {
2736
switch ((enum Scope)scope_memory) {
2837
case CrossDevice:
29-
BUILTIN_FENCE(semantics, "")
38+
BUILTIN_FENCE(order, "")
3039
case Device:
31-
BUILTIN_FENCE(semantics, "agent")
40+
BUILTIN_FENCE(order, "agent")
3241
case Workgroup:
33-
BUILTIN_FENCE(semantics, "workgroup")
42+
BUILTIN_FENCE(order, "workgroup")
3443
case Subgroup:
35-
BUILTIN_FENCE(semantics, "wavefront")
44+
BUILTIN_FENCE(order, "wavefront")
3645
case Invocation:
37-
BUILTIN_FENCE(semantics, "singlethread")
46+
BUILTIN_FENCE(order, "singlethread")
3847
}
3948
}
4049
#undef BUILTIN_FENCE
4150

51+
_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory,
52+
unsigned int semantics) {
53+
builtin_fence_order(scope_memory, semantics & 0x1F);
54+
}
55+
4256
_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory,
4357
unsigned int semantics) {
4458
__mem_fence(scope_memory, semantics);
4559
}
4660

4761
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
48-
__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory,
62+
__spirv_ControlBarrier(unsigned int scope_execution, unsigned int scope_memory,
4963
unsigned int semantics) {
5064
if (semantics) {
5165
__mem_fence(scope_memory, semantics);

sycl/plugins/unified_runtime/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
131131

132132
fetch_adapter_source(hip
133133
${UNIFIED_RUNTIME_REPO}
134-
${UNIFIED_RUNTIME_TAG}
134+
# commit 2c4303c25b026f7edb215accdccb1bc5ae2e237b
135+
# Merge: abe85cc9 3e011c70
136+
# Author: Kenneth Benzie (Benie) <[email protected]>
137+
# Date: Thu Jun 13 10:22:34 2024 +0100
138+
# Merge pull request #1414 from GeorgeWeb/georgi/hip-fences
139+
# [HIP] Enable more ordering and scope capabilities for atomic fences
140+
2c4303c25b026f7edb215accdccb1bc5ae2e237b
135141
)
136142

137143
fetch_adapter_source(native_cpu

0 commit comments

Comments
 (0)