Skip to content

Commit 4968e7c

Browse files
authored
[SYCL] Add atomic_accessor extension (#2069)
Updates the SYCL_INTEL_extended_atomics extension to describe atomic_accessor, and adds an initial implementation.
1 parent f7cc622 commit 4968e7c

File tree

5 files changed

+296
-2
lines changed

5 files changed

+296
-2
lines changed

sycl/doc/extensions/ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,13 +331,43 @@ The effects of a call to +atomic_fence+ depend on the value of the +order+ param
331331
- `acq_rel`: Both an acquire fence and a release fence
332332
- `seq_cst`: A sequentially consistent acquire and release fence
333333

334+
==== Atomic Accessor
335+
336+
The +atomic_accessor+ class replaces accessors created with +access::mode::atomic+. All operators of +atomic_accessor+ which provide access to an element of the underlying memory do so by wrapping the element in an +atomic_ref+. In order to guarantee that all accesses to the underlying memory are atomic, an +atomic_accessor+ does not allow direct access to the memory via +get_pointer()+.
337+
338+
All other member functions are as defined in the +accessor+ class.
339+
340+
|===
341+
| Member Function | Description
342+
343+
| `operator atomic_ref<DataT, DefaultOrder, DefaultScope, access::address_space::global_space>() const`
344+
| Available only when: `Dimensions == 0`. Returns an `atomic_ref` associated with the element stored in the underlying `buffer` or work-group local memory.
345+
346+
| `atomic_ref<DataT, DefaultOrder, DefaultScope, access::address_space::global_space> operator[](id<Dimensions> index) const`
347+
| Available only when: `Dimensions > 0`. Returns an `atomic_ref` associated with the element stored at the specified _index_ in the underlying `buffer` or work-group local memory.
348+
349+
| `atomic_ref<DataT, DefaultOrder, DefaultScope, access::address_space::global_space> operator[](size_t index) const`
350+
| Available only when: `Dimensions == 1`. Returns an `atomic_ref` associated with the element stored at the specified _index_ in the underlying `buffer` or work-group local memory.
351+
352+
| `global_ptr<DataT> get_pointer() const = delete`
353+
| Direct access to the underlying `buffer` or work-group local memory is not permitted.
354+
355+
|===
356+
357+
To simplify the construction of an +atomic_accessor+, tag objects of type +order_tag_t+ and +scope_tag_t+ may optionally be passed to the constructor. These tag objects enable the `DefaultOrder` and `DefaultScope` template arguments to be deduced via CTAD, as shown in the example below:
358+
[source,c++]
359+
----
360+
auto acc = atomic_accessor(buf, h, relaxed_order, device_scope);
361+
----
362+
334363
==== Sample Header
335364

336365
[source,c++]
337366
----
338367
namespace cl {
339368
namespace sycl {
340369
namespace intel {
370+
341371
enum class memory_order : /* unspecified */ {
342372
relaxed, acquire, release, acq_rel, seq_cst
343373
};
@@ -356,6 +386,24 @@ inline constexpr memory_scope memory_scope_work_group = memory_scope::work_group
356386
inline constexpr memory_scope memory_scope_device = memory_scope::device;
357387
inline constexpr memory_scope memory_scope_system = memory_scope::system;
358388
389+
template <memory_order> struct order_tag_t {
390+
explicit order_tag_t() = default;
391+
};
392+
inline constexpr order_tag_t<memory_order::relaxed> relaxed_order{};
393+
inline constexpr order_tag_t<memory_order::acquire> acquire_order{};
394+
inline constexpr order_tag_t<memory_order::release> release_order{};
395+
inline constexpr order_tag_t<memory_order::acq_rel> acq_rel_order{};
396+
inline constexpr order_tag_t<memory_order::seq_cst> seq_cst_order{};
397+
398+
template <memory_scope> struct scope_tag_t {
399+
explicit scope_tag_t() = default;
400+
};
401+
inline constexpr scope_tag_t<memory_scope::work_item> work_item_scope{};
402+
inline constexpr scope_tag_t<memory_scope::sub_group> sub_group_scope{};
403+
inline constexpr scope_tag_t<memory_scope::work_group> work_group_scope{};
404+
inline constexpr scope_tag_t<memory_scope::device> device_scope{};
405+
inline constexpr scope_tag_t<memory_scope::system> system_scope{};
406+
359407
// Exposition only
360408
template <memory_order ReadModifyWriteOrder>
361409
struct memory_order_traits;
@@ -578,6 +626,12 @@ class atomic_ref<T*, DefaultOrder, DefaultScope, Space> {
578626
579627
void atomic_fence(memory_order order, memory_scope scope):
580628
629+
template <typename DataT, int Dimensions,
630+
memory_order DefaultOrder, memory_scope DefaultScope,
631+
access::target AccessTarget = access::target::global_buffer,
632+
access::placeholder IsPlaceholder = access::placeholder::false_t>
633+
class atomic_accessor;
634+
581635
} // namespace intel
582636
} // namespace sycl
583637
} // namespace cl
@@ -605,6 +659,7 @@ None.
605659
|3|2020-04-09|John Pennycook|*Add atomic_fence*
606660
|4|2020-04-24|John Pennycook|*Add memory scope*
607661
|5|2020-04-29|John Pennycook|*Fix ambiguous overloads of compare_exchange and typo in fetch_sub*
662+
|6|2020-07-08|John Pennycook|*Add atomic_accessor*
608663
|========================================
609664
610665
//************************************************************************

sycl/include/CL/sycl/accessor.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,7 +1359,7 @@ class accessor<DataT, Dimensions, AccessMode, access::target::local,
13591359
#endif
13601360
public detail::accessor_common<DataT, Dimensions, AccessMode,
13611361
access::target::local, IsPlaceholder> {
1362-
1362+
protected:
13631363
constexpr static int AdjustedDim = Dimensions == 0 ? 1 : Dimensions;
13641364

13651365
using AccessorCommonT =
@@ -1395,7 +1395,7 @@ class accessor<DataT, Dimensions, AccessMode, access::target::local,
13951395
accessor()
13961396
: impl(detail::InitializedVal<AdjustedDim, range>::template get<0>()) {}
13971397

1398-
private:
1398+
protected:
13991399
ConcreteASPtrType getQualifiedPtr() const { return MData; }
14001400

14011401
ConcreteASPtrType MData;

sycl/include/CL/sycl/intel/atomic.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#pragma once
1010

11+
#include <CL/sycl/intel/atomic_accessor.hpp>
1112
#include <CL/sycl/intel/atomic_enums.hpp>
1213
#include <CL/sycl/intel/atomic_fence.hpp>
1314
#include <CL/sycl/intel/atomic_ref.hpp>
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
//==--- atomic_accessor.hpp - SYCL_INTEL_extended_atomics atomic_accessor --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/access/access.hpp>
12+
#include <CL/sycl/intel/atomic_enums.hpp>
13+
#include <CL/sycl/intel/atomic_ref.hpp>
14+
15+
__SYCL_INLINE_NAMESPACE(cl) {
16+
namespace sycl {
17+
namespace intel {
18+
19+
#if __cplusplus > 201402L
20+
21+
template <memory_order> struct order_tag_t {
22+
explicit order_tag_t() = default;
23+
};
24+
inline constexpr order_tag_t<memory_order::relaxed> relaxed_order{};
25+
inline constexpr order_tag_t<memory_order::acquire> acquire_order{};
26+
inline constexpr order_tag_t<memory_order::release> release_order{};
27+
inline constexpr order_tag_t<memory_order::acq_rel> acq_rel_order{};
28+
inline constexpr order_tag_t<memory_order::seq_cst> seq_cst_order{};
29+
30+
template <memory_scope> struct scope_tag_t {
31+
explicit scope_tag_t() = default;
32+
};
33+
inline constexpr scope_tag_t<memory_scope::work_item> work_item_scope{};
34+
inline constexpr scope_tag_t<memory_scope::sub_group> sub_group_scope{};
35+
inline constexpr scope_tag_t<memory_scope::work_group> work_group_scope{};
36+
inline constexpr scope_tag_t<memory_scope::device> device_scope{};
37+
inline constexpr scope_tag_t<memory_scope::system> system_scope{};
38+
39+
#endif
40+
41+
template <typename DataT, int Dimensions, memory_order DefaultOrder,
42+
memory_scope DefaultScope,
43+
access::target AccessTarget = access::target::global_buffer,
44+
access::placeholder IsPlaceholder = access::placeholder::false_t>
45+
class atomic_accessor
46+
: public accessor<DataT, Dimensions, access::mode::read_write, AccessTarget,
47+
IsPlaceholder> {
48+
49+
using AccessorT = accessor<DataT, Dimensions, access::mode::read_write,
50+
AccessTarget, IsPlaceholder>;
51+
52+
private:
53+
using AccessorT::getLinearIndex;
54+
using AccessorT::getQualifiedPtr;
55+
56+
// Prevent non-atomic access to atomic accessor
57+
multi_ptr<DataT, AccessorT::AS> get_pointer() const = delete;
58+
59+
protected:
60+
using AccessorT::AdjustedDim;
61+
62+
public:
63+
using value_type = DataT;
64+
using reference =
65+
atomic_ref<DataT, DefaultOrder, DefaultScope, AccessorT::AS>;
66+
67+
using AccessorT::AccessorT;
68+
69+
#if __cplusplus > 201402L
70+
71+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
72+
memory_order Order, memory_scope Scope>
73+
atomic_accessor(buffer<T, Dims, AllocatorT> &BufferRef, order_tag_t<Order>,
74+
scope_tag_t<Scope>, const property_list &PropertyList = {})
75+
: atomic_accessor(BufferRef, PropertyList) {}
76+
77+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
78+
memory_order Order, memory_scope Scope>
79+
atomic_accessor(buffer<T, Dims, AllocatorT> &BufferRef,
80+
handler &CommandGroupHandler, order_tag_t<Order>,
81+
scope_tag_t<Scope>, const property_list &PropertyList = {})
82+
: atomic_accessor(BufferRef, CommandGroupHandler, PropertyList) {}
83+
84+
#endif
85+
86+
// Override subscript operators and conversions to wrap in an atomic_ref
87+
template <int Dims = Dimensions>
88+
operator typename detail::enable_if_t<Dims == 0, reference>() const {
89+
const size_t LinearIndex = getLinearIndex(id<AdjustedDim>());
90+
return reference(getQualifiedPtr()[LinearIndex]);
91+
}
92+
93+
template <int Dims = Dimensions>
94+
typename detail::enable_if_t<(Dims > 0), reference>
95+
operator[](id<Dimensions> Index) const {
96+
const size_t LinearIndex = getLinearIndex(Index);
97+
return reference(getQualifiedPtr()[LinearIndex]);
98+
}
99+
100+
template <int Dims = Dimensions>
101+
typename detail::enable_if_t<Dims == 1, reference>
102+
operator[](size_t Index) const {
103+
const size_t LinearIndex = getLinearIndex(id<AdjustedDim>(Index));
104+
return reference(getQualifiedPtr()[LinearIndex]);
105+
}
106+
};
107+
108+
#if __cplusplus > 201402L
109+
110+
template <typename DataT, int Dimensions, typename AllocatorT,
111+
memory_order Order, memory_scope Scope>
112+
atomic_accessor(buffer<DataT, Dimensions, AllocatorT>, order_tag_t<Order>,
113+
scope_tag_t<Scope>, property_list = {})
114+
->atomic_accessor<DataT, Dimensions, Order, Scope, target::global_buffer,
115+
access::placeholder::true_t>;
116+
117+
template <typename DataT, int Dimensions, typename AllocatorT,
118+
memory_order Order, memory_scope Scope>
119+
atomic_accessor(buffer<DataT, Dimensions, AllocatorT>, handler,
120+
order_tag_t<Order>, scope_tag_t<Scope>, property_list = {})
121+
->atomic_accessor<DataT, Dimensions, Order, Scope, target::global_buffer,
122+
access::placeholder::false_t>;
123+
124+
#endif
125+
126+
} // namespace intel
127+
} // namespace sycl
128+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/atomic_ref/accessor.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
#include <CL/sycl.hpp>
7+
#include <algorithm>
8+
#include <cassert>
9+
#include <iostream>
10+
#include <numeric>
11+
#include <vector>
12+
using namespace sycl;
13+
using namespace sycl::intel;
14+
15+
// Equivalent to add_test from add.cpp
16+
// Uses atomic_accessor instead of atomic_ref
17+
template <typename T> void accessor_test(queue q, size_t N) {
18+
T sum = 0;
19+
std::vector<T> output(N, 0);
20+
{
21+
buffer<T> sum_buf(&sum, 1);
22+
buffer<T> output_buf(output.data(), output.size());
23+
24+
q.submit([&](handler &cgh) {
25+
#if __cplusplus > 201402L
26+
static_assert(
27+
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
28+
device_scope)),
29+
atomic_accessor<T, 1, intel::memory_order::relaxed,
30+
intel::memory_scope::device>>::value,
31+
"atomic_accessor type incorrectly deduced");
32+
#endif
33+
auto sum = atomic_accessor<T, 1, intel::memory_order::relaxed,
34+
intel::memory_scope::device>(sum_buf, cgh);
35+
auto out =
36+
output_buf.template get_access<access::mode::discard_write>(cgh);
37+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
38+
int gid = it.get_id(0);
39+
static_assert(
40+
std::is_same<
41+
decltype(sum[0]),
42+
atomic_ref<T, intel::memory_order::relaxed,
43+
intel::memory_scope::device,
44+
access::address_space::global_space>>::value,
45+
"atomic_accessor returns incorrect atomic_ref");
46+
out[gid] = sum[0].fetch_add(T(1));
47+
});
48+
});
49+
}
50+
51+
// All work-items increment by 1, so final value should be equal to N
52+
assert(sum == N);
53+
54+
// Intermediate values should be unique
55+
std::sort(output.begin(), output.end());
56+
assert(std::unique(output.begin(), output.end()) == output.end());
57+
58+
// Fetch returns original value: will be in [0, N-1]
59+
auto min_e = output[0];
60+
auto max_e = output[output.size() - 1];
61+
assert(min_e == 0 && max_e == N - 1);
62+
}
63+
64+
// Simplified form of accessor_test for local memory
65+
template <typename T>
66+
void local_accessor_test(queue q, size_t N, size_t L = 8) {
67+
assert(N % L == 0);
68+
std::vector<T> output(N / L, 0);
69+
{
70+
buffer<T> output_buf(output.data(), output.size());
71+
q.submit([&](handler &cgh) {
72+
auto sum =
73+
atomic_accessor<T, 1, intel::memory_order::relaxed,
74+
intel::memory_scope::device, access::target::local>(
75+
1, cgh);
76+
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
77+
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
78+
int grp = it.get_group(0);
79+
sum[0].store(0);
80+
it.barrier();
81+
static_assert(
82+
std::is_same<decltype(sum[0]),
83+
atomic_ref<T, intel::memory_order::relaxed,
84+
intel::memory_scope::device,
85+
access::address_space::local_space>>::value,
86+
"local atomic_accessor returns incorrect atomic_ref");
87+
T result = sum[0].fetch_add(T(1));
88+
if (result == it.get_local_range(0) - 1) {
89+
out[grp] = result;
90+
}
91+
});
92+
});
93+
}
94+
95+
// All work-items increment by 1, and last in the group writes out old value
96+
// All values should be L-1
97+
assert(std::all_of(output.begin(), output.end(),
98+
[=](T x) { return x == L - 1; }));
99+
}
100+
101+
int main() {
102+
queue q;
103+
constexpr int N = 32;
104+
accessor_test<int>(q, N);
105+
// TODO: Enable local accessor test for host when barrier is supported
106+
if (!q.get_device().is_host()) {
107+
local_accessor_test<int>(q, N);
108+
}
109+
std::cout << "Test passed." << std::endl;
110+
}

0 commit comments

Comments
 (0)