|
8 | 8 |
|
9 | 9 | #include <clc/clc.h>
|
10 | 10 | #include <spirv/spirv.h>
|
| 11 | +#include <spirv/spirv_types.h> |
11 | 12 |
|
12 |
| -void __clc_amdgcn_s_waitcnt(unsigned flags); |
| 13 | +#define BUILTIN_FENCE(semantics, scope_memory) \ |
| 14 | + if (semantics & Acquire) \ |
| 15 | + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \ |
| 16 | + else if (semantics & Release) \ |
| 17 | + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \ |
| 18 | + else if (semantics & AcquireRelease) \ |
| 19 | + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \ |
| 20 | + else if (semantics & SequentiallyConsistent) \ |
| 21 | + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \ |
| 22 | + else \ |
| 23 | + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); |
13 | 24 |
|
14 |
| -// s_waitcnt takes 16bit argument with a combined number of maximum allowed |
15 |
| -// pending operations: |
16 |
| -// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages |
17 |
| -// [7] -- undefined |
18 |
| -// [6:4] -- exports, GDS, and mem write |
19 |
| -// [3:0] -- vector memory operations |
20 |
| - |
21 |
| -// Newer clang supports __builtin_amdgcn_s_waitcnt |
22 |
| -#if __clang_major__ >= 5 |
23 |
| -#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) |
24 |
| -#else |
25 |
| -#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) |
26 |
| -_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); |
27 |
| -#endif |
28 |
| - |
29 |
| -_CLC_DEF _CLC_OVERLOAD void __mem_fence(cl_mem_fence_flags flags) { |
30 |
| - if (flags & CLK_GLOBAL_MEM_FENCE) { |
31 |
| - // scalar loads are counted with LGKM but we don't know whether |
32 |
| - // the compiler turned any loads to scalar |
33 |
| - __waitcnt(0); |
34 |
| - } else if (flags & CLK_LOCAL_MEM_FENCE) |
35 |
| - __waitcnt(0xff); // LGKM is [12:8] |
| 25 | +_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory, |
| 26 | + unsigned int semantics) { |
| 27 | + switch ((enum Scope)scope_memory) { |
| 28 | + case CrossDevice: |
| 29 | + BUILTIN_FENCE(semantics, "") |
| 30 | + case Device: |
| 31 | + BUILTIN_FENCE(semantics, "agent") |
| 32 | + case Workgroup: |
| 33 | + BUILTIN_FENCE(semantics, "workgroup") |
| 34 | + case Subgroup: |
| 35 | + BUILTIN_FENCE(semantics, "wavefront") |
| 36 | + case Invocation: |
| 37 | + BUILTIN_FENCE(semantics, "singlethread") |
| 38 | + } |
36 | 39 | }
|
37 |
| -#undef __waitcnt |
| 40 | +#undef BUILTIN_FENCE |
38 | 41 |
|
39 |
| -_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory, |
| 42 | +_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory, |
40 | 43 | unsigned int semantics) {
|
41 |
| - __mem_fence(memory); |
| 44 | + __mem_fence(scope_memory, semantics); |
42 | 45 | }
|
43 | 46 |
|
44 | 47 | _CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
|
45 |
| -__spirv_ControlBarrier(unsigned int scope, unsigned int memory, |
| 48 | +__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory, |
46 | 49 | unsigned int semantics) {
|
47 | 50 | if (semantics) {
|
48 |
| - __mem_fence(memory); |
| 51 | + __mem_fence(scope_memory, semantics); |
49 | 52 | }
|
50 | 53 | __builtin_amdgcn_s_barrier();
|
51 | 54 | }
|
0 commit comments