Skip to content

Commit 7784209

Browse files
author
Hugh Delaney
committed
WIP newer HIP atomics
1 parent 361a456 commit 7784209

File tree

4 files changed

+50
-29
lines changed

4 files changed

+50
-29
lines changed

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_add.cl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add)
1717
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add)
1818
AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add)
1919

20-
#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, AS_MANGLED, SUB1, CHECK, NEW_BUILTIN) \
21-
_CLC_DEF float \
22-
_Z21__spirv_AtomicFAddEXT##P##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \
23-
volatile AS float *p, enum Scope scope, \
24-
enum MemorySemanticsMask semantics, float val) { \
25-
if (CHECK) { \
26-
float ret = NEW_BUILTIN(p, val); \
27-
return *(float *)&ret; \
28-
} \
29-
int atomic_scope = 0, memory_order = 0; \
30-
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
31-
float ret = __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \
32-
return *(float *)&ret; \
20+
#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, AS_MANGLED, SUB1, CHECK, NEW_BUILTIN) \
21+
_CLC_DEF float \
22+
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \
23+
volatile AS float *p, enum Scope scope, \
24+
enum MemorySemanticsMask semantics, float val) { \
25+
if (CHECK) { \
26+
float ret = NEW_BUILTIN(p, val); \
27+
return *(float *)&ret; \
28+
} \
29+
int atomic_scope = 0, memory_order = 0; \
30+
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
31+
float ret = __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \
32+
return *(float *)&ret; \
3333
}
3434

3535
AMDGPU_ATOMIC_FP32_ADD_IMPL(global, U3AS1, 1, __oclc_ISA_version >= 9010,

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_max.cl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,28 @@
1111
#include <spirv/spirv.h>
1212
#include <spirv/spirv_types.h>
1313

14+
extern constant int __oclc_ISA_version;
15+
1416
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max)
1517
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max)
1618
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max)
1719
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max)
1820

19-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii)
20-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii)
21-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii)
21+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii,
22+
false, )
23+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii,
24+
false, )
25+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii, false, )
2226

2327
#ifdef cl_khr_int64_base_atomics
24-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll)
25-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll)
26-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll)
28+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll,
29+
__oclc_ISA_version >= 9010,
30+
__builtin_amdgcn_global_atomic_fmax_f64)
31+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll,
32+
false, )
33+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll,
34+
__oclc_ISA_version >= 9010,
35+
__builtin_amdgcn_flat_atomic_fmax_f64)
2736
#endif
2837

2938
#undef AMDGPU_ATOMIC

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_min.cl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,28 @@
1111
#include <spirv/spirv.h>
1212
#include <spirv/spirv_types.h>
1313

14+
extern constant int __oclc_ISA_version;
15+
1416
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min)
1517
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min)
1618
AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min)
1719
AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min)
1820

19-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii)
20-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii)
21-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii)
21+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii,
22+
false, )
23+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii,
24+
false, )
25+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii, false, )
2226

2327
#ifdef cl_khr_int64_base_atomics
24-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll)
25-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll)
26-
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll)
28+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll,
29+
__oclc_ISA_version >= 9010,
30+
__builtin_amdgcn_global_atomic_fmin_f64)
31+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll,
32+
false, )
33+
AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll,
34+
__oclc_ISA_version >= 9010,
35+
__builtin_amdgcn_flat_atomic_fmin_f64)
2736
#endif
2837

2938
#undef AMDGPU_ATOMIC

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_minmax.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
#include <spirv/spirv.h>
1111
#include <spirv/spirv_types.h>
1212

13-
#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, TYPE_MANGLED, \
14-
STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \
15-
AS_MANGLED, SUB1, SUB2) \
13+
#define AMDGPU_ATOMIC_FP_MINMAX_IMPL( \
14+
OPNAME, OP, TYPE, TYPE_MANGLED, STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \
15+
AS_MANGLED, SUB1, SUB2, CHECK, NEW_BUILTIN) \
1616
_CLC_DEF STORAGE_TYPE \
1717
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \
1818
volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \
@@ -26,6 +26,10 @@
2626
_Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
2727
volatile AS TYPE *p, enum Scope scope, \
2828
enum MemorySemanticsMask semantics, TYPE val) { \
29+
if (CHECK) { \
30+
TYPE ret = NEW_BUILTIN(p, val); \
31+
return *(TYPE *)&ret; \
32+
} \
2933
int atomic_scope = 0, memory_order = 0; \
3034
volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \
3135
STORAGE_TYPE old_int_val = 0, new_int_val = 0; \
@@ -45,4 +49,3 @@
4549
\
4650
return old_val; \
4751
}
48-

0 commit comments

Comments
 (0)