|
10 | 10 | #include <spirv/spirv.h>
|
11 | 11 | #include <spirv/spirv_types.h>
|
12 | 12 |
|
| 13 | +extern int __oclc_ISA_version; |
| 14 | + |
| 15 | +//extern float __builtin_amdgcn_global_atomic_fadd_f32(global float *, float); |
| 16 | + |
13 | 17 | AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add)
|
14 | 18 | AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add)
|
15 | 19 | AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add)
|
16 | 20 | AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add)
|
17 |
| -AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add) |
| 21 | +//AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add) |
| 22 | + |
| 23 | +#define AMDGPU_ATOMIC_FP32_IMPL(AS, AS_MANGLED, SUB1) \ |
| 24 | + _CLC_DEF float \ |
| 25 | + _Z21__spirv_AtomicFAddEXT##P##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \ |
| 26 | + volatile AS float *p, enum Scope scope, \ |
| 27 | + enum MemorySemanticsMask semantics, float val) { \ |
| 28 | + int atomic_scope = 0, memory_order = 0; \ |
| 29 | + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ |
| 30 | + float ret; \ |
| 31 | + if (__oclc_ISA_version > 9010) \ |
| 32 | + ret = __builtin_amdgcn_global_atomic_fadd_f32(p, val); \ |
| 33 | + else \ |
| 34 | + ret = __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ |
| 35 | + return *(float *)&ret; \ |
| 36 | + } |
| 37 | + |
| 38 | +AMDGPU_ATOMIC_FP32_IMPL(global, U3AS1, 1) |
| 39 | +// AMDGPU_ATOMIC_FP32_IMPL(local, U3AS3, 1) |
| 40 | +// AMDGPU_ATOMIC_FP32_IMPL(, , 0) |
18 | 41 |
|
19 | 42 | #define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2) \
|
20 | 43 | _CLC_DEF long \
|
|
0 commit comments