Skip to content

Commit 4a25e2c

Browse files
committed
[LIBCLC] Extend __CLC_NVVM_ATOMIC with generic AS
1 parent 98dda9d commit 4a25e2c

File tree

1 file changed

+64
-54
lines changed

1 file changed

+64
-54
lines changed

libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h

Lines changed: 64 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -72,62 +72,72 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
7272
} \
7373
}
7474

75-
#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
76-
OP, NAME_MANGLED, ADDR_SPACE, \
77-
ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \
78-
_CLC_DECL TYPE \
79-
NAME_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
80-
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
81-
enum MemorySemanticsMask semantics, TYPE value) { \
82-
/* Semantics mask may include memory order, storage class and other info \
83-
Memory order is stored in the lowest 5 bits */ \
84-
unsigned int order = semantics & 0x1F; \
85-
switch (order) { \
86-
case None: \
87-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
88-
ADDR_SPACE, ADDR_SPACE_NV, ) \
89-
break; \
90-
case Acquire: \
91-
if (__clc_nvvm_reflect_arch() >= 700) { \
92-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
93-
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
94-
} else { \
95-
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
96-
OP, ADDR_SPACE, ADDR_SPACE_NV) \
97-
} \
98-
break; \
99-
case Release: \
100-
if (__clc_nvvm_reflect_arch() >= 700) { \
101-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
102-
ADDR_SPACE, ADDR_SPACE_NV, _release) \
103-
} else { \
104-
__spirv_MemoryBarrier(scope, Release); \
105-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
106-
ADDR_SPACE, ADDR_SPACE_NV, ) \
107-
} \
108-
break; \
109-
case AcquireRelease: \
110-
if (__clc_nvvm_reflect_arch() >= 700) { \
111-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
112-
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
113-
} else { \
114-
__spirv_MemoryBarrier(scope, Release); \
115-
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
116-
OP, ADDR_SPACE, ADDR_SPACE_NV) \
117-
} \
118-
break; \
119-
} \
120-
__builtin_trap(); \
121-
__builtin_unreachable(); \
75+
#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
76+
OP, NAME_MANGLED, ADDR_SPACE, \
77+
POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \
78+
__CLC_NVVM_ATOMIC_IMPL_SUBSTITUTION( \
79+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \
80+
ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, 1)
81+
82+
#define __CLC_NVVM_ATOMIC_IMPL_SUBSTITUTION( \
83+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \
84+
ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \
85+
__attribute__((always_inline)) _CLC_DECL TYPE \
86+
NAME_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\
87+
5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
88+
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
89+
enum MemorySemanticsMask semantics, TYPE value) { \
90+
/* Semantics mask may include memory order, storage class and other info \
91+
Memory order is stored in the lowest 5 bits */ \
92+
unsigned int order = semantics & 0x1F; \
93+
switch (order) { \
94+
case None: \
95+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
96+
ADDR_SPACE, ADDR_SPACE_NV, ) \
97+
break; \
98+
case Acquire: \
99+
if (__clc_nvvm_reflect_arch() >= 700) { \
100+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
101+
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
102+
} else { \
103+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
104+
OP, ADDR_SPACE, ADDR_SPACE_NV) \
105+
} \
106+
break; \
107+
case Release: \
108+
if (__clc_nvvm_reflect_arch() >= 700) { \
109+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
110+
ADDR_SPACE, ADDR_SPACE_NV, _release) \
111+
} else { \
112+
__spirv_MemoryBarrier(scope, Release); \
113+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
114+
ADDR_SPACE, ADDR_SPACE_NV, ) \
115+
} \
116+
break; \
117+
case AcquireRelease: \
118+
if (__clc_nvvm_reflect_arch() >= 700) { \
119+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
120+
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
121+
} else { \
122+
__spirv_MemoryBarrier(scope, Release); \
123+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
124+
OP, ADDR_SPACE, ADDR_SPACE_NV) \
125+
} \
126+
break; \
127+
} \
128+
__builtin_trap(); \
129+
__builtin_unreachable(); \
122130
}
123131

124132
#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
125133
NAME_MANGLED) \
126-
__attribute__((always_inline)) \
127-
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
128-
NAME_MANGLED, __global, AS1, _global_) \
129-
__attribute__((always_inline)) \
130-
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
131-
NAME_MANGLED, __local, AS3, _shared_)
132-
134+
__CLC_NVVM_ATOMIC_IMPL_SUBSTITUTION(TYPE, TYPE_MANGLED, TYPE_NV, \
135+
TYPE_MANGLED_NV, OP, NAME_MANGLED, \
136+
__global, PU3AS1, _global_, 1) \
137+
__CLC_NVVM_ATOMIC_IMPL_SUBSTITUTION(TYPE, TYPE_MANGLED, TYPE_NV, \
138+
TYPE_MANGLED_NV, OP, NAME_MANGLED, \
139+
__local, PU3AS3, _shared_, 1) \
140+
__CLC_NVVM_ATOMIC_IMPL_SUBSTITUTION(TYPE, TYPE_MANGLED, TYPE_NV, \
141+
TYPE_MANGLED_NV, OP, NAME_MANGLED, , P, \
142+
_gen_, 0)
133143
#endif

0 commit comments

Comments
 (0)