|
10 | 10 | #include <spirv/spirv.h>
|
11 | 11 | #include <spirv/spirv_types.h>
|
12 | 12 |
|
13 |
| -#define BUILTIN_FENCE(semantics, scope_memory) \ |
14 |
| - if (semantics & Acquire) \ |
15 |
| - return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \ |
16 |
| - else if (semantics & Release) \ |
17 |
| - return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \ |
18 |
| - else if (semantics & AcquireRelease) \ |
19 |
| - return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \ |
20 |
| - else if (semantics & SequentiallyConsistent) \ |
21 |
| - return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \ |
22 |
| - else \ |
23 |
| - return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); |
24 | 13 |
|
25 |
| -_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory, |
26 |
| - unsigned int semantics) { |
| 14 | +#define BUILTIN_FENCE(order, scope_memory) \ |
| 15 | + /* None implies Monotonic (for llvm/AMDGPU), or relaxed in C++. \ |
| 16 | + * This does not make sense as ordering argument for a fence instruction \ |
| 17 | + * and is not part of the supported orderings for a fence in AMDGPU. */ \ |
| 18 | + if (order != None) { \ |
| 19 | + switch (order) { \ |
| 20 | + case Acquire: \ |
| 21 | + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \ |
| 22 | + case Release: \ |
| 23 | + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \ |
| 24 | + case AcquireRelease: \ |
| 25 | + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \ |
| 26 | + case SequentiallyConsistent: \ |
| 27 | + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \ |
| 28 | + default: \ |
| 29 | + __builtin_trap(); \ |
| 30 | + __builtin_unreachable(); \ |
| 31 | + } \ |
| 32 | + } |
| 33 | + |
| 34 | +_CLC_INLINE void builtin_fence_order(unsigned int scope_memory, |
| 35 | + unsigned int order) { |
27 | 36 | switch ((enum Scope)scope_memory) {
|
28 | 37 | case CrossDevice:
|
29 |
| - BUILTIN_FENCE(semantics, "") |
| 38 | + BUILTIN_FENCE(order, "") |
30 | 39 | case Device:
|
31 |
| - BUILTIN_FENCE(semantics, "agent") |
| 40 | + BUILTIN_FENCE(order, "agent") |
32 | 41 | case Workgroup:
|
33 |
| - BUILTIN_FENCE(semantics, "workgroup") |
| 42 | + BUILTIN_FENCE(order, "workgroup") |
34 | 43 | case Subgroup:
|
35 |
| - BUILTIN_FENCE(semantics, "wavefront") |
| 44 | + BUILTIN_FENCE(order, "wavefront") |
36 | 45 | case Invocation:
|
37 |
| - BUILTIN_FENCE(semantics, "singlethread") |
| 46 | + BUILTIN_FENCE(order, "singlethread") |
38 | 47 | }
|
39 | 48 | }
|
40 | 49 | #undef BUILTIN_FENCE
|
41 | 50 |
|
| 51 | +_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory, |
| 52 | + unsigned int semantics) { |
| 53 | + builtin_fence_order(scope_memory, semantics & 0x1F); |
| 54 | +} |
| 55 | + |
42 | 56 | _CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory,
|
43 | 57 | unsigned int semantics) {
|
44 | 58 | __mem_fence(scope_memory, semantics);
|
45 | 59 | }
|
46 | 60 |
|
47 | 61 | _CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
|
48 |
| -__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory, |
| 62 | +__spirv_ControlBarrier(unsigned int scope_execution, unsigned int scope_memory, |
49 | 63 | unsigned int semantics) {
|
50 | 64 | if (semantics) {
|
51 | 65 | __mem_fence(scope_memory, semantics);
|
|
0 commit comments