10
10
#include <spirv/spirv.h>
11
11
#include <spirv/spirv_types.h>
12
12
13
- extern int __oclc_ISA_version ;
14
-
15
- //extern float __builtin_amdgcn_global_atomic_fadd_f32(global float *, float);
13
+ extern constant int __oclc_ISA_version ;
16
14
17
15
AMDGPU_ATOMIC (_Z18__spirv_AtomicIAdd , int , i , __hip_atomic_fetch_add )
18
16
AMDGPU_ATOMIC (_Z18__spirv_AtomicIAdd , unsigned int , j , __hip_atomic_fetch_add )
19
17
AMDGPU_ATOMIC (_Z18__spirv_AtomicIAdd , long , l , __hip_atomic_fetch_add )
20
18
AMDGPU_ATOMIC (_Z18__spirv_AtomicIAdd , unsigned long , m , __hip_atomic_fetch_add )
21
- //AMDGPU_ATOMIC(_Z21__spirv_AtomicFAddEXT, float, f, __hip_atomic_fetch_add)
22
19
23
- #define AMDGPU_ATOMIC_FP32_IMPL (AS , AS_MANGLED , SUB1 ) \
20
+ #define AMDGPU_ATOMIC_FP32_ADD_IMPL (AS , AS_MANGLED , SUB1 , CHECK , NEW_BUILTIN ) \
24
21
_CLC_DEF float \
25
22
_Z21__spirv_AtomicFAddEXT##P##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \
26
23
volatile AS float *p, enum Scope scope, \
27
24
enum MemorySemanticsMask semantics, float val) { \
25
+ if (CHECK) { \
26
+ float ret = NEW_BUILTIN(p, val); \
27
+ return *(float *)&ret; \
28
+ } \
28
29
int atomic_scope = 0, memory_order = 0; \
29
30
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
30
- float ret; \
31
- if (__oclc_ISA_version > 9010) \
32
- ret = __builtin_amdgcn_global_atomic_fadd_f32(p, val); \
33
- else \
34
- ret = __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \
31
+ float ret = __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \
35
32
return *(float *)&ret; \
36
33
}
37
34
38
- AMDGPU_ATOMIC_FP32_IMPL (global , U3AS1 , 1 )
39
- // AMDGPU_ATOMIC_FP32_IMPL(local, U3AS3, 1)
40
- // AMDGPU_ATOMIC_FP32_IMPL(, , 0)
35
+ AMDGPU_ATOMIC_FP32_ADD_IMPL (global , U3AS1 , 1 , __oclc_ISA_version >= 9010 ,
36
+ __builtin_amdgcn_global_atomic_fadd_f32 )
37
+ AMDGPU_ATOMIC_FP32_ADD_IMPL (local , U3AS3 , 1 , __oclc_ISA_version >= 8000 ,
38
+ __builtin_amdgcn_ds_atomic_fadd_f32 )
39
+ AMDGPU_ATOMIC_FP32_ADD_IMPL (, , 0 , __oclc_ISA_version >= 9400 ,
40
+ __builtin_amdgcn_flat_atomic_fadd_f32 )
41
41
42
- #define AMDGPU_ATOMIC_FP64_ADD_IMPL (AS , AS_MANGLED , SUB1 , SUB2 ) \
42
+ #define AMDGPU_ATOMIC_FP64_ADD_IMPL (AS , AS_MANGLED , SUB1 , SUB2 , CHECK , \
43
+ NEW_BUILTIN ) \
43
44
_CLC_DEF long \
44
45
_Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \
45
46
volatile AS long *, enum Scope, enum MemorySemanticsMask, \
@@ -51,6 +52,10 @@ AMDGPU_ATOMIC_FP32_IMPL(global, U3AS1, 1)
51
52
_Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \
52
53
volatile AS double *p, enum Scope scope, \
53
54
enum MemorySemanticsMask semantics, double val) { \
55
+ if (CHECK) { \
56
+ double ret = NEW_BUILTIN(p, val); \
57
+ return *(double *)&ret; \
58
+ } \
54
59
int atomic_scope = 0, memory_order = 0; \
55
60
volatile AS long *int_pointer = (volatile AS long *)p; \
56
61
long old_int_val = 0, new_int_val = 0; \
@@ -69,12 +74,16 @@ AMDGPU_ATOMIC_FP32_IMPL(global, U3AS1, 1)
69
74
}
70
75
71
76
#ifdef cl_khr_int64_base_atomics
72
- AMDGPU_ATOMIC_FP64_ADD_IMPL (global , U3AS1 , 1 , 5 )
73
- AMDGPU_ATOMIC_FP64_ADD_IMPL (local , U3AS3 , 1 , 5 )
74
- AMDGPU_ATOMIC_FP64_ADD_IMPL (, , 0 , 4 )
77
+ AMDGPU_ATOMIC_FP64_ADD_IMPL (global , U3AS1 , 1 , 5 , __oclc_ISA_version >= 9010 ,
78
+ __builtin_amdgcn_global_atomic_fadd_f64 )
79
+ AMDGPU_ATOMIC_FP64_ADD_IMPL (local , U3AS3 , 1 , 5 , __oclc_ISA_version >= 9010 ,
80
+ __builtin_amdgcn_ds_atomic_fadd_f64 )
81
+ AMDGPU_ATOMIC_FP64_ADD_IMPL (, , 0 , 4 , __oclc_ISA_version >= 9400 ,
82
+ __builtin_amdgcn_flat_atomic_fadd_f64 )
75
83
#endif
76
84
77
85
#undef AMDGPU_ATOMIC
78
86
#undef AMDGPU_ATOMIC_IMPL
87
+ #undef AMDGPU_ATOMIC_FP32_ADD_IMPL
79
88
#undef AMDGPU_ATOMIC_FP64_ADD_IMPL
80
89
#undef GET_ATOMIC_SCOPE_AND_ORDER
0 commit comments