Skip to content

Commit fdf44d4

Browse files
PennycookAlexeySachkovrolandschulz
authored
[SYCL] Add prototype of ExtendedAtomics features (#1826)
Features: - atomic_ref with integral and floating-point specializations - atomic_fence Tests: - exchange - compare_exchange - fetch_add, +=, ++ - fetch_sub, -=, -- - fetch_min - fetch_max Signed-off-by: John Pennycook <[email protected]> Co-authored-by: Alexey Sachkov <[email protected]> Co-authored-by: Roland Schulz <[email protected]>
1 parent afd7f4a commit fdf44d4

File tree

17 files changed

+1712
-2
lines changed

17 files changed

+1712
-2
lines changed

sycl/doc/extensions/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ DPC++ extensions status:
1313
| [SYCL_INTEL_deduction_guides](deduction_guides/SYCL_INTEL_deduction_guides.asciidoc) | Supported | |
1414
| [SYCL_INTEL_device_specific_kernel_queries](DeviceSpecificKernelQueries/SYCL_INTEL_device_specific_kernel_queries.asciidoc) | Proposal | |
1515
| [SYCL_INTEL_enqueue_barrier](EnqueueBarrier/enqueue_barrier.asciidoc) | Supported(OpenCL, Level Zero) | |
16-
| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Proposal | |
16+
| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Partially supported(OpenCL: CPU, GPU) | Not supported: pointer types |
1717
| [SYCL_INTEL_group_algorithms](GroupAlgorithms/SYCL_INTEL_group_algorithms.asciidoc) | Supported(OpenCL) | |
1818
| [SYCL_INTEL_group_mask](./GroupMask/SYCL_INTEL_group_mask.asciidoc) | Proposal | |
1919
| [FPGA selector](IntelFPGA/FPGASelector.md) | Supported | |

sycl/include/CL/__spirv/spirv_ops.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,
155155
macro(__attribute__((opencl_local)), Arg)
156156

157157
__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float)
158+
__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double)
158159
__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int)
159160
__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long)
160161
__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long)

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <CL/sycl/handler.hpp>
2323
#include <CL/sycl/id.hpp>
2424
#include <CL/sycl/image.hpp>
25+
#include <CL/sycl/intel/atomic.hpp>
2526
#include <CL/sycl/intel/builtins.hpp>
2627
#include <CL/sycl/intel/function_pointer.hpp>
2728
#include <CL/sycl/intel/group_algorithm.hpp>

sycl/include/CL/sycl/detail/defines.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,10 @@
4646
#warning "No assumptions will be emitted due to no __builtin_assume available"
4747
#endif
4848
#endif
49+
50+
// inline constexpr is a C++17 feature
51+
#if __cplusplus >= 201703L
52+
#define __SYCL_INLINE_CONSTEXPR inline constexpr
53+
#else
54+
#define __SYCL_INLINE_CONSTEXPR static constexpr
55+
#endif

sycl/include/CL/sycl/detail/spirv.hpp

Lines changed: 222 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <CL/__spirv/spirv_vars.hpp>
1313
#include <CL/sycl/detail/generic_type_traits.hpp>
1414
#include <CL/sycl/detail/type_traits.hpp>
15+
#include <CL/sycl/intel/atomic_enums.hpp>
1516

1617
#ifdef __SYCL_DEVICE_ONLY__
1718
__SYCL_INLINE_NAMESPACE(cl) {
@@ -28,7 +29,7 @@ template <int Dimensions> struct group_scope<group<Dimensions>> {
2829
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup;
2930
};
3031

31-
template <> struct group_scope<intel::sub_group> {
32+
template <> struct group_scope<::cl::sycl::intel::sub_group> {
3233
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup;
3334
};
3435

@@ -69,6 +70,226 @@ T GroupBroadcast(T x, id<Dimensions> local_id) {
6970
return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
7071
}
7172

73+
// Single happens-before means semantics should always apply to all spaces
74+
// Although consume is unsupported, forwarding to acquire is valid
75+
static inline constexpr __spv::MemorySemanticsMask::Flag
76+
getMemorySemanticsMask(intel::memory_order Order) {
77+
__spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None;
78+
switch (Order) {
79+
case intel::memory_order::relaxed:
80+
SpvOrder = __spv::MemorySemanticsMask::None;
81+
break;
82+
case intel::memory_order::__consume_unsupported:
83+
case intel::memory_order::acquire:
84+
SpvOrder = __spv::MemorySemanticsMask::Acquire;
85+
break;
86+
case intel::memory_order::release:
87+
SpvOrder = __spv::MemorySemanticsMask::Release;
88+
break;
89+
case intel::memory_order::acq_rel:
90+
SpvOrder = __spv::MemorySemanticsMask::AcquireRelease;
91+
break;
92+
case intel::memory_order::seq_cst:
93+
SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent;
94+
break;
95+
}
96+
return static_cast<__spv::MemorySemanticsMask::Flag>(
97+
SpvOrder | __spv::MemorySemanticsMask::SubgroupMemory |
98+
__spv::MemorySemanticsMask::WorkgroupMemory |
99+
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
100+
}
101+
102+
static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) {
103+
switch (Scope) {
104+
case intel::memory_scope::work_item:
105+
return __spv::Scope::Invocation;
106+
case intel::memory_scope::sub_group:
107+
return __spv::Scope::Subgroup;
108+
case intel::memory_scope::work_group:
109+
return __spv::Scope::Workgroup;
110+
case intel::memory_scope::device:
111+
return __spv::Scope::Device;
112+
case intel::memory_scope::system:
113+
return __spv::Scope::CrossDevice;
114+
}
115+
}
116+
117+
template <typename T, access::address_space AddressSpace>
118+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
119+
AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
120+
intel::memory_scope Scope, intel::memory_order Success,
121+
intel::memory_order Failure, T Desired, T Expected) {
122+
auto SPIRVSuccess = getMemorySemanticsMask(Success);
123+
auto SPIRVFailure = getMemorySemanticsMask(Failure);
124+
auto SPIRVScope = getScope(Scope);
125+
auto *Ptr = MPtr.get();
126+
return __spirv_AtomicCompareExchange(Ptr, SPIRVScope, SPIRVSuccess,
127+
SPIRVFailure, Desired, Expected);
128+
}
129+
130+
template <typename T, access::address_space AddressSpace>
131+
inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
132+
AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
133+
intel::memory_scope Scope, intel::memory_order Success,
134+
intel::memory_order Failure, T Desired, T Expected) {
135+
using I = detail::make_unsinged_integer_t<T>;
136+
auto SPIRVSuccess = getMemorySemanticsMask(Success);
137+
auto SPIRVFailure = getMemorySemanticsMask(Failure);
138+
auto SPIRVScope = getScope(Scope);
139+
auto *PtrInt =
140+
reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
141+
MPtr.get());
142+
I DesiredInt = detail::bit_cast<I>(Desired);
143+
I ExpectedInt = detail::bit_cast<I>(Expected);
144+
I ResultInt = __spirv_AtomicCompareExchange(
145+
PtrInt, SPIRVScope, SPIRVSuccess, SPIRVFailure, DesiredInt, ExpectedInt);
146+
return detail::bit_cast<T>(ResultInt);
147+
}
148+
149+
template <typename T, access::address_space AddressSpace>
150+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
151+
AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
152+
intel::memory_order Order) {
153+
auto *Ptr = MPtr.get();
154+
auto SPIRVOrder = getMemorySemanticsMask(Order);
155+
auto SPIRVScope = getScope(Scope);
156+
return __spirv_AtomicLoad(Ptr, SPIRVScope, SPIRVOrder);
157+
}
158+
159+
template <typename T, access::address_space AddressSpace>
160+
inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
161+
AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
162+
intel::memory_order Order) {
163+
using I = detail::make_unsinged_integer_t<T>;
164+
auto *PtrInt =
165+
reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
166+
MPtr.get());
167+
auto SPIRVOrder = getMemorySemanticsMask(Order);
168+
auto SPIRVScope = getScope(Scope);
169+
I ResultInt = __spirv_AtomicLoad(PtrInt, SPIRVScope, SPIRVOrder);
170+
return detail::bit_cast<T>(ResultInt);
171+
}
172+
173+
template <typename T, access::address_space AddressSpace>
174+
inline typename detail::enable_if_t<std::is_integral<T>::value>
175+
AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
176+
intel::memory_order Order, T Value) {
177+
auto *Ptr = MPtr.get();
178+
auto SPIRVOrder = getMemorySemanticsMask(Order);
179+
auto SPIRVScope = getScope(Scope);
180+
__spirv_AtomicStore(Ptr, SPIRVScope, SPIRVOrder, Value);
181+
}
182+
183+
template <typename T, access::address_space AddressSpace>
184+
inline typename detail::enable_if_t<std::is_floating_point<T>::value>
185+
AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
186+
intel::memory_order Order, T Value) {
187+
using I = detail::make_unsinged_integer_t<T>;
188+
auto *PtrInt =
189+
reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
190+
MPtr.get());
191+
auto SPIRVOrder = getMemorySemanticsMask(Order);
192+
auto SPIRVScope = getScope(Scope);
193+
I ValueInt = detail::bit_cast<I>(Value);
194+
__spirv_AtomicStore(PtrInt, SPIRVScope, SPIRVOrder, ValueInt);
195+
}
196+
197+
template <typename T, access::address_space AddressSpace>
198+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
199+
AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
200+
intel::memory_order Order, T Value) {
201+
auto *Ptr = MPtr.get();
202+
auto SPIRVOrder = getMemorySemanticsMask(Order);
203+
auto SPIRVScope = getScope(Scope);
204+
return __spirv_AtomicExchange(Ptr, SPIRVScope, SPIRVOrder, Value);
205+
}
206+
207+
template <typename T, access::address_space AddressSpace>
208+
inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
209+
AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
210+
intel::memory_order Order, T Value) {
211+
using I = detail::make_unsinged_integer_t<T>;
212+
auto *PtrInt =
213+
reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
214+
MPtr.get());
215+
auto SPIRVOrder = getMemorySemanticsMask(Order);
216+
auto SPIRVScope = getScope(Scope);
217+
I ValueInt = detail::bit_cast<I>(Value);
218+
I ResultInt =
219+
__spirv_AtomicExchange(PtrInt, SPIRVScope, SPIRVOrder, ValueInt);
220+
return detail::bit_cast<T>(ResultInt);
221+
}
222+
223+
template <typename T, access::address_space AddressSpace>
224+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
225+
AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
226+
intel::memory_order Order, T Value) {
227+
auto *Ptr = MPtr.get();
228+
auto SPIRVOrder = getMemorySemanticsMask(Order);
229+
auto SPIRVScope = getScope(Scope);
230+
return __spirv_AtomicIAdd(Ptr, SPIRVScope, SPIRVOrder, Value);
231+
}
232+
233+
template <typename T, access::address_space AddressSpace>
234+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
235+
AtomicISub(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
236+
intel::memory_order Order, T Value) {
237+
auto *Ptr = MPtr.get();
238+
auto SPIRVOrder = getMemorySemanticsMask(Order);
239+
auto SPIRVScope = getScope(Scope);
240+
return __spirv_AtomicISub(Ptr, SPIRVScope, SPIRVOrder, Value);
241+
}
242+
243+
template <typename T, access::address_space AddressSpace>
244+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
245+
AtomicAnd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
246+
intel::memory_order Order, T Value) {
247+
auto *Ptr = MPtr.get();
248+
auto SPIRVOrder = getMemorySemanticsMask(Order);
249+
auto SPIRVScope = getScope(Scope);
250+
return __spirv_AtomicAnd(Ptr, SPIRVScope, SPIRVOrder, Value);
251+
}
252+
253+
template <typename T, access::address_space AddressSpace>
254+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
255+
AtomicOr(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
256+
intel::memory_order Order, T Value) {
257+
auto *Ptr = MPtr.get();
258+
auto SPIRVOrder = getMemorySemanticsMask(Order);
259+
auto SPIRVScope = getScope(Scope);
260+
return __spirv_AtomicOr(Ptr, SPIRVScope, SPIRVOrder, Value);
261+
}
262+
263+
template <typename T, access::address_space AddressSpace>
264+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
265+
AtomicXor(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
266+
intel::memory_order Order, T Value) {
267+
auto *Ptr = MPtr.get();
268+
auto SPIRVOrder = getMemorySemanticsMask(Order);
269+
auto SPIRVScope = getScope(Scope);
270+
return __spirv_AtomicXor(Ptr, SPIRVScope, SPIRVOrder, Value);
271+
}
272+
273+
template <typename T, access::address_space AddressSpace>
274+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
275+
AtomicMin(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
276+
intel::memory_order Order, T Value) {
277+
auto *Ptr = MPtr.get();
278+
auto SPIRVOrder = getMemorySemanticsMask(Order);
279+
auto SPIRVScope = getScope(Scope);
280+
return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value);
281+
}
282+
283+
template <typename T, access::address_space AddressSpace>
284+
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
285+
AtomicMax(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
286+
intel::memory_order Order, T Value) {
287+
auto *Ptr = MPtr.get();
288+
auto SPIRVOrder = getMemorySemanticsMask(Order);
289+
auto SPIRVScope = getScope(Scope);
290+
return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value);
291+
}
292+
72293
} // namespace spirv
73294
} // namespace detail
74295
} // namespace sycl

sycl/include/CL/sycl/intel/atomic.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//==---------------- atomic.hpp - SYCL_INTEL_extended_atomics --------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/intel/atomic_enums.hpp>
12+
#include <CL/sycl/intel/atomic_fence.hpp>
13+
#include <CL/sycl/intel/atomic_ref.hpp>
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
//==---------------- atomic_enums.hpp - SYCL_INTEL_extended_atomics enums --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/__spirv/spirv_ops.hpp>
12+
#include <CL/sycl/access/access.hpp>
13+
#include <CL/sycl/detail/defines.hpp>
14+
#include <CL/sycl/detail/helpers.hpp>
15+
16+
#ifndef __SYCL_DEVICE_ONLY__
17+
#include <atomic>
18+
#endif
19+
#include <type_traits>
20+
21+
__SYCL_INLINE_NAMESPACE(cl) {
22+
namespace sycl {
23+
namespace intel {
24+
25+
enum class memory_order : int {
26+
relaxed,
27+
acquire,
28+
__consume_unsupported, // helps optimizer when mapping to std::memory_order
29+
release,
30+
acq_rel,
31+
seq_cst
32+
};
33+
__SYCL_INLINE_CONSTEXPR memory_order memory_order_relaxed =
34+
memory_order::relaxed;
35+
__SYCL_INLINE_CONSTEXPR memory_order memory_order_acquire =
36+
memory_order::acquire;
37+
__SYCL_INLINE_CONSTEXPR memory_order memory_order_release =
38+
memory_order::release;
39+
__SYCL_INLINE_CONSTEXPR memory_order memory_order_acq_rel =
40+
memory_order::acq_rel;
41+
__SYCL_INLINE_CONSTEXPR memory_order memory_order_seq_cst =
42+
memory_order::seq_cst;
43+
44+
enum class memory_scope : int {
45+
work_item,
46+
sub_group,
47+
work_group,
48+
device,
49+
system
50+
};
51+
__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_work_item =
52+
memory_scope::work_item;
53+
__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_sub_group =
54+
memory_scope::sub_group;
55+
__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_work_group =
56+
memory_scope::work_group;
57+
__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_device = memory_scope::device;
58+
__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_system = memory_scope::system;
59+
60+
#ifndef __SYCL_DEVICE_ONLY__
61+
namespace detail {
62+
// Cannot use switch statement in constexpr before C++14
63+
// Nested ternary conditions in else branch required for C++11
64+
#if __cplusplus >= 201402L
65+
static inline constexpr std::memory_order
66+
getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
67+
switch (order) {
68+
case memory_order::relaxed:
69+
return std::memory_order_relaxed;
70+
case memory_order::__consume_unsupported:
71+
return std::memory_order_consume;
72+
case memory_order::acquire:
73+
return std::memory_order_acquire;
74+
case memory_order::release:
75+
return std::memory_order_release;
76+
case memory_order::acq_rel:
77+
return std::memory_order_acq_rel;
78+
case memory_order::seq_cst:
79+
return std::memory_order_seq_cst;
80+
}
81+
}
82+
#else
83+
static inline constexpr std::memory_order
84+
getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
85+
return (order == memory_order::relaxed)
86+
? std::memory_order_relaxed
87+
: (order == memory_order::__consume_unsupported)
88+
? std::memory_order_consume
89+
: (order == memory_order::acquire)
90+
? std::memory_order_acquire
91+
: (order == memory_order::release)
92+
? std::memory_order_release
93+
: (order == memory_order::acq_rel)
94+
? std::memory_order_acq_rel
95+
: std::memory_order_seq_cst;
96+
}
97+
#endif // __cplusplus
98+
} // namespace detail
99+
#endif // __SYCL_DEVICE_ONLY__
100+
101+
} // namespace intel
102+
} // namespace sycl
103+
} // __SYCL_INLINE_NAMESPACE(cl)

0 commit comments

Comments
 (0)