@@ -17,7 +17,98 @@ __CLC_NVVM_ATOMIC(ulong, m, long, l, add, _Z18__spirv_AtomicIAdd)
17
17
18
18
__CLC_NVVM_ATOMIC (float , f , float , f , add , _Z21__spirv_AtomicFAddEXT )
19
19
#ifdef cl_khr_int64_base_atomics
20
- __CLC_NVVM_ATOMIC (double , d , double , d , add , _Z21__spirv_AtomicFAddEXT )
20
+
21
+ #define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL (ADDR_SPACE , ADDR_SPACE_MANGLED , \
22
+ ADDR_SPACE_NV , SUBSTITUTION1 , \
23
+ SUBSTITUTION2 ) \
24
+ long \
25
+ _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
26
+ volatile ADDR_SPACE const long *, enum Scope, \
27
+ enum MemorySemanticsMask); \
28
+ long \
29
+ _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \
30
+ volatile ADDR_SPACE long *, enum Scope, enum MemorySemanticsMask, \
31
+ enum MemorySemanticsMask, long, long); \
32
+ __attribute__((always_inline)) _CLC_DECL double \
33
+ _Z21__spirv_AtomicFAddEXT##P##ADDR_SPACE_MANGLED##d##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##d( \
34
+ volatile ADDR_SPACE double *pointer, enum Scope scope, \
35
+ enum MemorySemanticsMask semantics, double value) { \
36
+ /* Semantics mask may include memory order, storage class and other info \
37
+ Memory order is stored in the lowest 5 bits */ \
38
+ unsigned int order = semantics & 0x1F ; \
39
+ if (__clc_nvvm_reflect_arch () >= 600 ) { \
40
+ switch (order ) { \
41
+ case None : \
42
+ __CLC_NVVM_ATOMIC_IMPL_ORDER (double , double , d , add , ADDR_SPACE , \
43
+ ADDR_SPACE_NV , ) \
44
+ break ; \
45
+ case Acquire : \
46
+ if (__clc_nvvm_reflect_arch () >= 700 ) { \
47
+ __CLC_NVVM_ATOMIC_IMPL_ORDER (double , double , d , add , ADDR_SPACE , \
48
+ ADDR_SPACE_NV , _acquire ) \
49
+ } else { \
50
+ __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE (double , double , d , add , \
51
+ ADDR_SPACE , ADDR_SPACE_NV ) \
52
+ } \
53
+ break ; \
54
+ case Release : \
55
+ if (__clc_nvvm_reflect_arch () >= 700 ) { \
56
+ __CLC_NVVM_ATOMIC_IMPL_ORDER (double , double , d , add , ADDR_SPACE , \
57
+ ADDR_SPACE_NV , _release ) \
58
+ } else { \
59
+ __spirv_MemoryBarrier (scope , Release ); \
60
+ __CLC_NVVM_ATOMIC_IMPL_ORDER (double , double , d , add , ADDR_SPACE , \
61
+ ADDR_SPACE_NV , ) \
62
+ } \
63
+ break ; \
64
+ case AcquireRelease : \
65
+ if (__clc_nvvm_reflect_arch () >= 700 ) { \
66
+ __CLC_NVVM_ATOMIC_IMPL_ORDER (double , double , d , add , ADDR_SPACE , \
67
+ ADDR_SPACE_NV , _acq_rel ) \
68
+ } else { \
69
+ __spirv_MemoryBarrier (scope , Release ); \
70
+ __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE (double , double , d , add , \
71
+ ADDR_SPACE , ADDR_SPACE_NV ) \
72
+ } \
73
+ break ; \
74
+ } \
75
+ __builtin_trap (); \
76
+ __builtin_unreachable (); \
77
+ } else { \
78
+ enum MemorySemanticsMask load_order ; \
79
+ switch (semantics ) { \
80
+ case SequentiallyConsistent : \
81
+ load_order = SequentiallyConsistent ; \
82
+ break ; \
83
+ case Acquire : \
84
+ case AcquireRelease : \
85
+ load_order = Acquire ; \
86
+ break ; \
87
+ default : \
88
+ load_order = None ; \
89
+ } \
90
+ volatile ADDR_SPACE long * pointer_int = \
91
+ (volatile ADDR_SPACE long * )pointer ; \
92
+ long old_int ; \
93
+ long new_val_int ; \
94
+ do { \
95
+ old_int = \
96
+ _Z18__spirv_AtomicLoadP ##ADDR_SPACE_MANGLED ##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
97
+ pointer_int, scope, load_order); \
98
+ double new_val = *(double *)&old_int + *(double *)&value; \
99
+ new_val_int = *(long *)&new_val; \
100
+ } while ( \
101
+ _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \
102
+ pointer_int, scope, semantics, semantics, new_val_int, \
103
+ old_int) != old_int); \
104
+ return *(double *)&old_int; \
105
+ } \
106
+ }
107
+
108
+ __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL (, , _gen_ , 0 , 4 )
109
+ __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL (__global , U3AS1 , _global_ , 1 , 5 )
110
+ __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL (__local , U3AS3 , _shared_ , 1 , 5 )
111
+
21
112
#endif
22
113
23
114
#undef __CLC_NVVM_ATOMIC_TYPES
0 commit comments