Skip to content

[SYCL][HIP] Add gfx9+ hip atomics #8170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Disables NVVM reflection to defer to after linking
set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72"
"SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false")
elseif( ${ARCH} STREQUAL amdgcn )
# AMDGCN needs libclc to be compiled to high bc version since all atomic
# clang builtins need to be accessible
set( flags "SHELL:-mcpu=gfx940")
else()
set ( flags )
endif()
Expand Down
41 changes: 36 additions & 5 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,34 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

extern constant int __oclc_ISA_version;

AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add)
AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)

#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \
#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, AS_MANGLED, SUB1, CHECK, NEW_BUILTIN) \
_CLC_DEF float \
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \
volatile AS float *p, enum Scope scope, \
enum MemorySemanticsMask semantics, float val) { \
if (CHECK) \
return NEW_BUILTIN(p, val); \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \
}

AMDGPU_ATOMIC_FP32_ADD_IMPL(global, U3AS1, 1, AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_global_atomic_fadd_f32)
AMDGPU_ATOMIC_FP32_ADD_IMPL(local, U3AS3, 1, AMDGPU_ARCH_GEQ(8000),
__builtin_amdgcn_ds_atomic_fadd_f32)
AMDGPU_ATOMIC_FP32_ADD_IMPL(, , 0, AMDGPU_ARCH_BETWEEN(9400, 10000),
__builtin_amdgcn_flat_atomic_fadd_f32)

#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2, CHECK, \
NEW_BUILTIN) \
_CLC_DEF long \
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
volatile AS long *, enum Scope, enum MemorySemanticsMask, \
Expand All @@ -28,6 +49,8 @@ AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \
volatile AS double *p, enum Scope scope, \
enum MemorySemanticsMask semantics, double val) { \
if (CHECK) \
return NEW_BUILTIN(p, val); \
int atomic_scope = 0, memory_order = 0; \
volatile AS long *int_pointer = (volatile AS long *)p; \
long old_int_val = 0, new_int_val = 0; \
Expand All @@ -46,12 +69,20 @@ AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)
}

#ifdef cl_khr_int64_base_atomics
AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5)
AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5)
AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4)
AMDGPU_ATOMIC_FP64_ADD_IMPL(global, U3AS1, 1, 5,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_global_atomic_fadd_f64)
AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_ds_atomic_fadd_f64)
AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4, AMDGPU_ARCH_BETWEEN(9400, 10000),
__builtin_amdgcn_flat_atomic_fadd_f64)
#endif

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_FP32_ADD_IMPL
#undef AMDGPU_ATOMIC_FP64_ADD_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_and.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ AMDGPU_ATOMIC(_Z17__spirv_AtomicAnd, unsigned long, m, __hip_atomic_fetch_and)

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,6 @@ AMDGPU_ATOMIC_CMPXCHG(float, f)
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_CPMXCHG
#undef AMDGPU_ATOMIC_CPMXCHG_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
20 changes: 11 additions & 9 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

#define AMDGPU_ARCH_GEQ(LOWER) __oclc_ISA_version >= LOWER
#define AMDGPU_ARCH_BETWEEN(LOWER, UPPER) \
__oclc_ISA_version >= LOWER &&__oclc_ISA_version < UPPER

#define GET_ATOMIC_SCOPE_AND_ORDER(IN_SCOPE, OUT_SCOPE, IN_SEMANTICS, \
OUT_ORDER) \
{ \
Expand Down Expand Up @@ -52,20 +56,18 @@
} \
}

#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \
#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \
SUB1, BUILTIN) \
_CLC_DEF TYPE \
_CLC_DEF TYPE \
FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
volatile AS TYPE *p, enum Scope scope, \
enum MemorySemanticsMask semantics, TYPE val) { \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
TYPE ret = BUILTIN(p, val, memory_order, atomic_scope); \
return *(TYPE *)&ret; \
volatile AS TYPE *p, enum Scope scope, \
enum MemorySemanticsMask semantics, TYPE val) { \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
return BUILTIN(p, val, memory_order, atomic_scope); \
}

#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN)

5 changes: 3 additions & 2 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
enum MemorySemanticsMask semantics) { \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
TYPE res = __hip_atomic_load(p, memory_order, atomic_scope); \
return *(TYPE *)&res; \
return __hip_atomic_load(p, memory_order, atomic_scope); \
}

#define AMDGPU_ATOMIC_LOAD(TYPE, TYPE_MANGLED) \
Expand All @@ -38,4 +37,6 @@ AMDGPU_ATOMIC_LOAD(float, Kf)
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_LOAD
#undef AMDGPU_ATOMIC_LOAD_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
23 changes: 17 additions & 6 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,33 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

extern constant int __oclc_ISA_version;

AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max)
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max)
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max)
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max)

AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii, false, )

#ifdef cl_khr_int64_base_atomics
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_global_atomic_fmax_f64)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_flat_atomic_fmax_f64)
#endif

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL
#undef GET_ATOMIC_SCOPE_AND_ORDER
23 changes: 17 additions & 6 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,33 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

extern constant int __oclc_ISA_version;

AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min)
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min)
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min)
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min)

AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii, false, )

#ifdef cl_khr_int64_base_atomics
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_global_atomic_fmin_f64)
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll,
false, )
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll,
AMDGPU_ARCH_BETWEEN(9010, 10000),
__builtin_amdgcn_flat_atomic_fmin_f64)
#endif

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef AMDGPU_ATOMIC_FP_MINMAX_IMPL
#undef GET_ATOMIC_SCOPE_AND_ORDER
9 changes: 5 additions & 4 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, TYPE_MANGLED, \
STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \
AS_MANGLED, SUB1, SUB2) \
#define AMDGPU_ATOMIC_FP_MINMAX_IMPL( \
OPNAME, OP, TYPE, TYPE_MANGLED, STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \
AS_MANGLED, SUB1, SUB2, CHECK, NEW_BUILTIN) \
_CLC_DEF STORAGE_TYPE \
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \
volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \
Expand All @@ -26,6 +26,8 @@
_Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
volatile AS TYPE *p, enum Scope scope, \
enum MemorySemanticsMask semantics, TYPE val) { \
if (CHECK) \
return NEW_BUILTIN(p, val); \
int atomic_scope = 0, memory_order = 0; \
volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \
STORAGE_TYPE old_int_val = 0, new_int_val = 0; \
Expand All @@ -45,4 +47,3 @@
\
return old_val; \
}

2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_or.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ AMDGPU_ATOMIC(_Z16__spirv_AtomicOr, unsigned long, m, __hip_atomic_fetch_or)

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ AMDGPU_ATOMIC_STORE(float, f)
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_STORE
#undef AMDGPU_ATOMIC_STORE_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
5 changes: 3 additions & 2 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_sub.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
enum MemorySemanticsMask semantics, TYPE val) { \
int atomic_scope = 0, memory_order = 0; \
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
TYPE ret = BUILTIN(p, val, memory_order); \
return *(TYPE *)&ret; \
return BUILTIN(p, val, memory_order); \
}

#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \
Expand All @@ -39,4 +38,6 @@ AMDGPU_ATOMIC_SUB(_Z21__spirv_AtomicFSubEXT, float, f, __atomic_fetch_sub)
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ATOMIC_SUB
#undef AMDGPU_ATOMIC_SUB_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, __hip_atomic_exchange)

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER
2 changes: 2 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor)

#undef AMDGPU_ATOMIC
#undef AMDGPU_ATOMIC_IMPL
#undef AMDGPU_ARCH_GEQ
#undef AMDGPU_ARCH_BETWEEN
#undef GET_ATOMIC_SCOPE_AND_ORDER