Skip to content

Commit dcbcd74

Browse files
authored
[LIBCLC] Use amdgcn_fence builtin in mem_fence implementation (intel#10133)
1 parent 6314af8 commit dcbcd74

File tree

1 file changed

+31
-28
lines changed
  • libclc/amdgcn-amdhsa/libspirv/synchronization

1 file changed

+31
-28
lines changed

libclc/amdgcn-amdhsa/libspirv/synchronization/barrier.cl

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,44 +8,47 @@
88

99
#include <clc/clc.h>
1010
#include <spirv/spirv.h>
11+
#include <spirv/spirv_types.h>
1112

12-
void __clc_amdgcn_s_waitcnt(unsigned flags);
13+
#define BUILTIN_FENCE(semantics, scope_memory) \
14+
if (semantics & Acquire) \
15+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \
16+
else if (semantics & Release) \
17+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \
18+
else if (semantics & AcquireRelease) \
19+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \
20+
else if (semantics & SequentiallyConsistent) \
21+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \
22+
else \
23+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory);
1324

14-
// s_waitcnt takes 16bit argument with a combined number of maximum allowed
15-
// pending operations:
16-
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
17-
// [7] -- undefined
18-
// [6:4] -- exports, GDS, and mem write
19-
// [3:0] -- vector memory operations
20-
21-
// Newer clang supports __builtin_amdgcn_s_waitcnt
22-
#if __clang_major__ >= 5
23-
#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
24-
#else
25-
#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
26-
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
27-
#endif
28-
29-
_CLC_DEF _CLC_OVERLOAD void __mem_fence(cl_mem_fence_flags flags) {
30-
if (flags & CLK_GLOBAL_MEM_FENCE) {
31-
// scalar loads are counted with LGKM but we don't know whether
32-
// the compiler turned any loads to scalar
33-
__waitcnt(0);
34-
} else if (flags & CLK_LOCAL_MEM_FENCE)
35-
__waitcnt(0xff); // LGKM is [12:8]
25+
_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory,
26+
unsigned int semantics) {
27+
switch ((enum Scope)scope_memory) {
28+
case CrossDevice:
29+
BUILTIN_FENCE(semantics, "")
30+
case Device:
31+
BUILTIN_FENCE(semantics, "agent")
32+
case Workgroup:
33+
BUILTIN_FENCE(semantics, "workgroup")
34+
case Subgroup:
35+
BUILTIN_FENCE(semantics, "wavefront")
36+
case Invocation:
37+
BUILTIN_FENCE(semantics, "singlethread")
38+
}
3639
}
37-
#undef __waitcnt
40+
#undef BUILTIN_FENCE
3841

39-
_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory,
42+
_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory,
4043
unsigned int semantics) {
41-
__mem_fence(memory);
44+
__mem_fence(scope_memory, semantics);
4245
}
4346

4447
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
45-
__spirv_ControlBarrier(unsigned int scope, unsigned int memory,
48+
__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory,
4649
unsigned int semantics) {
4750
if (semantics) {
48-
__mem_fence(memory);
51+
__mem_fence(scope_memory, semantics);
4952
}
5053
__builtin_amdgcn_s_barrier();
5154
}

0 commit comments

Comments
 (0)