Skip to content

Commit e99ead8

Browse files
authored
[LIBCLC] Add support for more generic atomic operations (#7391)
The diffs are quite hard to follow, but in an essence this patch brings: * a new entry, implementing a generic address space for multiple `__CLC_NVVM_ATOMIC_XYZ_IMPL`, where `XYZ` stands for `CAS`, `INDEC`, `LOAD`, `MAX`, `MIN`, `STORE` and `SUB`, * fixes the name of mangled function that the IMPL uses, * the rest is just formatting to 80 chars. This patch supersedes: #5849 but it requires the fixes to the remangler from: #7220 Fixes: #7658
1 parent ca54ea3 commit e99ead8

File tree

7 files changed

+298
-264
lines changed

7 files changed

+298
-264
lines changed

libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 59 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -69,63 +69,70 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
6969
} \
7070
}
7171

72-
#define __CLC_NVVM_ATOMIC_CAS_IMPL( \
73-
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, ADDR_SPACE, \
74-
ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \
75-
_CLC_DECL TYPE \
76-
_Z29__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \
77-
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
78-
enum MemorySemanticsMask semantics1, \
79-
enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \
80-
/* Semantics mask may include memory order, storage class and other info \
81-
Memory order is stored in the lowest 5 bits */ \
82-
unsigned int order = (semantics1 | semantics2) & 0x1F; \
83-
switch (order) { \
84-
case None: \
85-
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
86-
ADDR_SPACE, ADDR_SPACE_NV, ) \
87-
case Acquire: \
88-
if (__clc_nvvm_reflect_arch() >= 700) { \
89-
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
90-
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
91-
} else { \
92-
__CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \
93-
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
94-
} \
95-
break; \
96-
case Release: \
97-
if (__clc_nvvm_reflect_arch() >= 700) { \
98-
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
99-
ADDR_SPACE, ADDR_SPACE_NV, _release) \
100-
} else { \
101-
__spirv_MemoryBarrier(scope, Release); \
102-
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
103-
ADDR_SPACE, ADDR_SPACE_NV, ) \
104-
} \
105-
break; \
106-
case AcquireRelease: \
107-
if (__clc_nvvm_reflect_arch() >= 700) { \
108-
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
109-
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
110-
} else { \
111-
__spirv_MemoryBarrier(scope, Release); \
112-
__CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \
113-
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
114-
} \
115-
break; \
116-
} \
117-
__builtin_trap(); \
118-
__builtin_unreachable(); \
72+
// Type __spirv_AtomicCompareExchange(AS Type *P, __spv::Scope::Flag S,
73+
// __spv::MemorySemanticsMask::Flag E,
74+
// __spv::MemorySemanticsMask::Flag U,
75+
// Type V, Type C);
76+
#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \
77+
TYPE_MANGLED_NV, OP, OP_MANGLED, \
78+
ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, \
79+
ADDR_SPACE_NV, SUBSTITUTION1, SUBSTITUTION2) \
80+
__attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\
81+
Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\
82+
__spv5Scope4FlagENS##SUBSTITUTION1##_19Memory\
83+
SemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_MANGLED##TYPE_MANGLED( \
84+
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
85+
enum MemorySemanticsMask semantics1, \
86+
enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \
87+
/* Semantics mask may include memory order, storage class and other info \
88+
Memory order is stored in the lowest 5 bits */ \
89+
unsigned int order = (semantics1 | semantics2) & 0x1F; \
90+
switch (order) { \
91+
case None: \
92+
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
93+
ADDR_SPACE, ADDR_SPACE_NV, ) \
94+
case Acquire: \
95+
if (__clc_nvvm_reflect_arch() >= 700) { \
96+
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
97+
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
98+
} else { \
99+
__CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \
100+
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
101+
} \
102+
break; \
103+
case Release: \
104+
if (__clc_nvvm_reflect_arch() >= 700) { \
105+
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
106+
ADDR_SPACE, ADDR_SPACE_NV, _release) \
107+
} else { \
108+
__spirv_MemoryBarrier(scope, Release); \
109+
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
110+
ADDR_SPACE, ADDR_SPACE_NV, ) \
111+
} \
112+
break; \
113+
case AcquireRelease: \
114+
if (__clc_nvvm_reflect_arch() >= 700) { \
115+
__CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
116+
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
117+
} else { \
118+
__spirv_MemoryBarrier(scope, Release); \
119+
__CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \
120+
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
121+
} \
122+
break; \
123+
} \
124+
__builtin_trap(); \
125+
__builtin_unreachable(); \
119126
}
120127

121128
#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
122129
OP, OP_MANGLED) \
123-
__attribute__((always_inline)) \
124130
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
125-
OP_MANGLED, __global, AS1, _global_) \
126-
__attribute__((always_inline)) \
127-
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
128-
OP, OP_MANGLED, __local, AS3, _shared_)
131+
OP_MANGLED, __global, PU3AS1, _global_, 1, 5) \
132+
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
133+
OP_MANGLED, __local, PU3AS3, _shared_, 1, 5) \
134+
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
135+
OP_MANGLED, , P, _gen_, 0, 4)
129136

130137
__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange)
131138
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,29 @@
1212
#include <spirv/spirv.h>
1313
#include <spirv/spirv_types.h>
1414

15-
#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \
16-
ADDR_SPACE, ADDR_SPACE_MANGLED) \
17-
TYPE \
18-
_Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
19-
volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \
20-
TYPE); \
21-
_CLC_DECL TYPE \
22-
_Z24__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
23-
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
24-
enum MemorySemanticsMask semantics) { \
25-
return _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
26-
pointer, scope, semantics, VAL); \
15+
#define __CLC_NVVM_ATOMIC_INCDEC_IMPL( \
16+
TYPE, TYPE_MANGLED, OP_MANGLED, VAL, ADDR_SPACE, \
17+
POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION) \
18+
TYPE _Z21__spirv_\
19+
AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\
20+
5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
21+
volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \
22+
__attribute__((always_inline)) _CLC_DECL TYPE _Z24__spirv_\
23+
Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\
24+
5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \
25+
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
26+
enum MemorySemanticsMask semantics) { \
27+
return _Z21__spirv_\
28+
AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\
29+
5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
30+
pointer, scope, semantics, VAL); \
2731
}
2832

2933
#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \
30-
__attribute__((always_inline)) \
3134
__CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \
32-
AS1) __attribute__((always_inline)) \
35+
PU3AS1, 1) \
3336
__CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \
34-
AS3)
37+
PU3AS3, 1) \
38+
__CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , P, 0)
3539

3640
#endif

0 commit comments

Comments
 (0)