@@ -16,126 +16,205 @@ namespace __ESIMD_DNS {
16
16
// This function implements atomic update of pre-existing variable in the
17
17
// absense of C++ 20's atomic_ref.
18
18
19
- template <typename Ty> Ty atomic_load (Ty *ptr) {
19
+ // __atomic_* functions support only integral types. In order to
20
+ // support floating types for certain operations like min/max,
21
+ // 'cmpxchg' operation is applied for result values using
22
+ // 'bridging' variables in integral type.
23
+ template <typename Ty> using CmpxchgTy = __ESIMD_DNS::uint_type_t <sizeof (Ty)>;
24
+
25
+ template <typename Ty> inline Ty atomic_load (Ty *ptr) {
20
26
#ifdef _WIN32
21
27
// TODO: Windows will be supported soon
22
28
__ESIMD_UNSUPPORTED_ON_HOST;
23
29
#else
24
- return __atomic_load (ptr, __ATOMIC_SEQ_CST);
30
+ __ESIMD_UNSUPPORTED_ON_HOST;
31
+ // TODO : Enable with unit test
32
+ /* return sycl::bit_cast<Ty>(__atomic_load_n((CmpxchgTy<Ty> *)ptr,
33
+ __ATOMIC_SEQ_CST)); */
25
34
#endif
26
35
}
27
36
28
- template <typename Ty> Ty atomic_store (Ty *ptr, Ty val) {
37
+ template <typename Ty> inline Ty atomic_store (Ty *ptr, Ty val) {
29
38
#ifdef _WIN32
30
39
// TODO: Windows will be supported soon
31
40
__ESIMD_UNSUPPORTED_ON_HOST;
32
41
#else
33
- __atomic_store (ptr, val, __ATOMIC_SEQ_CST);
42
+ Ty ret = atomic_load<Ty>((CmpxchgTy<Ty> *)ptr);
43
+ __atomic_store_n ((CmpxchgTy<Ty> *)ptr, val, __ATOMIC_SEQ_CST);
44
+ return ret;
34
45
#endif
35
46
}
36
47
37
- template <typename Ty> Ty atomic_add_fetch (Ty *ptr, Ty val) {
48
+ template <typename Ty> inline Ty atomic_add (Ty *ptr, Ty val) {
38
49
#ifdef _WIN32
39
50
// TODO: Windows will be supported soon
40
51
__ESIMD_UNSUPPORTED_ON_HOST;
41
52
#else
42
- return __atomic_add_fetch (ptr, val, __ATOMIC_SEQ_CST);
53
+ if constexpr (std::is_integral_v<Ty>) {
54
+ return __atomic_fetch_add (ptr, val, __ATOMIC_SEQ_CST);
55
+ } else {
56
+ // For Floating type
57
+ Ty _old, _new;
58
+ CmpxchgTy<Ty> _old_bits, _new_bits;
59
+ do {
60
+ _old = *ptr;
61
+ _new = _old + val;
62
+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
63
+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
64
+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
65
+ _new_bits, false , __ATOMIC_SEQ_CST,
66
+ __ATOMIC_SEQ_CST));
67
+ return _old;
68
+ }
43
69
#endif
44
70
}
45
71
46
- template <typename Ty> Ty atomic_sub_fetch (Ty *ptr, Ty val) {
72
+ template <typename Ty> inline Ty atomic_sub (Ty *ptr, Ty val) {
47
73
#ifdef _WIN32
48
74
// TODO: Windows will be supported soon
49
75
__ESIMD_UNSUPPORTED_ON_HOST;
50
76
#else
51
- return __atomic_sub_fetch (ptr, val, __ATOMIC_SEQ_CST);
77
+ if constexpr (std::is_integral_v<Ty>) {
78
+ return __atomic_fetch_sub (ptr, val, __ATOMIC_SEQ_CST);
79
+ } else {
80
+ // For Floating type
81
+ Ty _old, _new;
82
+ CmpxchgTy<Ty> _old_bits, _new_bits;
83
+ do {
84
+ _old = *ptr;
85
+ _new = _old - val;
86
+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
87
+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
88
+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
89
+ _new_bits, false , __ATOMIC_SEQ_CST,
90
+ __ATOMIC_SEQ_CST));
91
+ return _old;
92
+ }
52
93
#endif
53
94
}
54
95
55
- template <typename Ty> Ty atomic_and_fetch (Ty *ptr, Ty val) {
96
+ template <typename Ty> inline Ty atomic_and (Ty *ptr, Ty val) {
56
97
#ifdef _WIN32
57
98
// TODO: Windows will be supported soon
58
99
__ESIMD_UNSUPPORTED_ON_HOST;
59
100
#else
60
- return __atomic_and_fetch (ptr, val, __ATOMIC_SEQ_CST);
101
+ static_assert (std::is_integral<Ty>::value);
102
+ return __atomic_fetch_and (ptr, val, __ATOMIC_SEQ_CST);
61
103
#endif
62
104
}
63
105
64
- template <typename Ty> Ty atomic_or_fetch (Ty *ptr, Ty val) {
106
+ template <typename Ty> inline Ty atomic_or (Ty *ptr, Ty val) {
65
107
#ifdef _WIN32
66
108
// TODO: Windows will be supported soon
67
109
__ESIMD_UNSUPPORTED_ON_HOST;
68
110
#else
69
- return __atomic_or_fetch (ptr, val, __ATOMIC_SEQ_CST);
111
+ static_assert (std::is_integral<Ty>::value);
112
+ return __atomic_fetch_or (ptr, val, __ATOMIC_SEQ_CST);
70
113
#endif
71
114
}
72
115
73
- template <typename Ty> Ty atomic_xor_fetch (Ty *ptr, Ty val) {
116
+ template <typename Ty> inline Ty atomic_xor (Ty *ptr, Ty val) {
74
117
#ifdef _WIN32
75
118
// TODO: Windows will be supported soon
76
119
__ESIMD_UNSUPPORTED_ON_HOST;
77
120
#else
78
- return __atomic_xor_fetch (ptr, val, __ATOMIC_SEQ_CST);
121
+ static_assert (std::is_integral<Ty>::value);
122
+ return __atomic_fetch_xor (ptr, val, __ATOMIC_SEQ_CST);
79
123
#endif
80
124
}
81
125
82
- template <typename Ty> Ty atomic_min (Ty *ptr, Ty val) {
126
+ template <typename Ty> inline Ty atomic_min (Ty *ptr, Ty val) {
83
127
#ifdef _WIN32
84
128
// TODO: Windows will be supported soon
85
129
__ESIMD_UNSUPPORTED_ON_HOST;
86
130
#else
87
- // TODO FIXME: fix implementation for FP types.
88
131
if constexpr (std::is_integral_v<Ty>) {
89
132
Ty _old, _new;
90
133
do {
91
134
_old = *ptr;
92
135
_new = std::min<Ty>(_old, val);
93
136
} while (!__atomic_compare_exchange_n (ptr, &_old, _new, false ,
94
137
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
95
- return _new ;
138
+ return _old ;
96
139
} else {
97
- __ESIMD_UNSUPPORTED_ON_HOST;
140
+ Ty _old, _new;
141
+ CmpxchgTy<Ty> _old_bits, _new_bits;
142
+ do {
143
+ _old = *ptr;
144
+ _new = std::min (_old, val);
145
+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
146
+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
147
+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
148
+ _new_bits, false , __ATOMIC_SEQ_CST,
149
+ __ATOMIC_SEQ_CST));
150
+ return _old;
98
151
}
99
152
#endif
100
153
}
101
154
102
- template <typename Ty> Ty atomic_max (Ty *ptr, Ty val) {
155
+ template <typename Ty> inline Ty atomic_max (Ty *ptr, Ty val) {
103
156
#ifdef _WIN32
104
157
// TODO: Windows will be supported soon
105
158
__ESIMD_UNSUPPORTED_ON_HOST;
106
159
#else
107
- // TODO FIXME: fix implementation for FP types.
108
160
if constexpr (std::is_integral_v<Ty>) {
109
161
Ty _old, _new;
110
162
do {
111
163
_old = *ptr;
112
164
_new = std::max<Ty>(_old, val);
113
165
} while (!__atomic_compare_exchange_n (ptr, &_old, _new, false ,
114
166
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
115
- return _new ;
167
+ return _old ;
116
168
} else {
117
- __ESIMD_UNSUPPORTED_ON_HOST;
169
+ Ty _old, _new;
170
+ CmpxchgTy<Ty> _old_bits, _new_bits;
171
+ do {
172
+ _old = *ptr;
173
+ _new = std::max (_old, val);
174
+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
175
+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
176
+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)(CmpxchgTy<Ty> *)ptr,
177
+ &_old_bits, _new_bits, false ,
178
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
179
+ return _old;
118
180
}
119
181
#endif
120
182
}
121
183
122
- template <typename Ty> Ty atomic_cmpxchg (Ty *ptr, Ty expected, Ty desired) {
184
+ template <typename Ty>
185
+ inline Ty atomic_cmpxchg (Ty *ptr, Ty expected, Ty desired) {
123
186
#ifdef _WIN32
124
187
// TODO: Windows will be supported soon
125
188
__ESIMD_UNSUPPORTED_ON_HOST;
126
189
#else
127
- // TODO FIXME: fix implementation for FP types.
128
190
if constexpr (std::is_integral_v<Ty>) {
129
- Ty _old = expected;
130
- __atomic_compare_exchange_n (ptr, &_old , desired, false , __ATOMIC_SEQ_CST,
191
+ Ty local = expected;
192
+ __atomic_compare_exchange_n (ptr, &local , desired, false , __ATOMIC_SEQ_CST,
131
193
__ATOMIC_SEQ_CST);
132
- return *ptr;
194
+ // if exchange occured, this means 'local=expected=*ptr'. So local
195
+ // is returned as old val
196
+ // if exchange did not occur, *ptr value compared against 'local'
197
+ // is stored in 'local'. So local is returned as old val
198
+ return local;
133
199
} else {
134
- __ESIMD_UNSUPPORTED_ON_HOST;
200
+ CmpxchgTy<Ty> desired_bits = *(CmpxchgTy<Ty> *)&desired;
201
+ CmpxchgTy<Ty> local_bits = *(CmpxchgTy<Ty> *)&expected;
202
+ __atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &local_bits, desired_bits,
203
+ false , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
204
+ return *((Ty *)&local_bits);
135
205
}
136
206
#endif
137
207
}
138
208
209
+ inline void atomic_fence () {
210
+ #ifdef _WIN32
211
+ // TODO: Windows will be supported soon
212
+ __ESIMD_UNSUPPORTED_ON_HOST;
213
+ #else
214
+ __atomic_thread_fence (__ATOMIC_SEQ_CST);
215
+ #endif
216
+ }
217
+
139
218
} // namespace __ESIMD_DNS
140
219
141
220
// / @endcond ESIMD_DETAIL
0 commit comments