Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 03cb865

Browse files
committed
Merge remote-tracking branch 'upstream/intel' into filter
2 parents 7ccec3b + 704201a commit 03cb865

File tree

291 files changed

+6637
-2457
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

291 files changed

+6637
-2457
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,6 @@ SYCL/USM @jbrodman
7171

7272
# Stream
7373
SYCL/Basic/stream @againull
74+
75+
#BFloat16 conversion
76+
SYCL/BFloat16 @AlexeySotkin @MrSidims

SYCL/AOT/Inputs/aot.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ template <typename T, size_t N>
2020
void simple_vadd(const std::array<T, N> &VA, const std::array<T, N> &VB,
2121
std::array<T, N> &VC) {
2222
cl::sycl::queue deviceQueue([](cl::sycl::exception_list ExceptionList) {
23-
for (cl::sycl::exception_ptr_class ExceptionPtr : ExceptionList) {
23+
for (std::exception_ptr ExceptionPtr : ExceptionList) {
2424
try {
2525
std::rethrow_exception(ExceptionPtr);
2626
} catch (cl::sycl::exception &E) {

SYCL/AtomicRef/accessor.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,21 @@ template <typename T> void accessor_test(queue q, size_t N) {
2626
static_assert(
2727
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
2828
device_scope)),
29-
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
30-
ONEAPI::memory_scope::device>>::value,
29+
atomic_accessor<T, 1, memory_order::relaxed,
30+
memory_scope::device>>::value,
3131
"atomic_accessor type incorrectly deduced");
3232
#endif
33-
auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
34-
ONEAPI::memory_scope::device>(sum_buf, cgh);
33+
auto sum =
34+
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device>(
35+
sum_buf, cgh);
3536
auto out =
3637
output_buf.template get_access<access::mode::discard_write>(cgh);
3738
cgh.parallel_for(range<1>(N), [=](item<1> it) {
3839
int gid = it.get_id(0);
3940
static_assert(
4041
std::is_same<
4142
decltype(sum[0]),
42-
atomic_ref<T, ONEAPI::memory_order::relaxed,
43-
ONEAPI::memory_scope::device,
43+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
4444
access::address_space::global_space>>::value,
4545
"atomic_accessor returns incorrect atomic_ref");
4646
out[gid] = sum[0].fetch_add(T(1));
@@ -70,19 +70,18 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
7070
buffer<T> output_buf(output.data(), output.size());
7171
q.submit([&](handler &cgh) {
7272
auto sum =
73-
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
74-
ONEAPI::memory_scope::device, access::target::local>(
75-
1, cgh);
73+
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device,
74+
access::target::local>(1, cgh);
7675
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
7776
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
7877
int grp = it.get_group(0);
7978
sum[0].store(0);
8079
it.barrier();
8180
static_assert(
82-
std::is_same<decltype(sum[0]),
83-
atomic_ref<T, ONEAPI::memory_order::relaxed,
84-
ONEAPI::memory_scope::device,
85-
access::address_space::local_space>>::value,
81+
std::is_same<
82+
decltype(sum[0]),
83+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
84+
access::address_space::local_space>>::value,
8685
"local atomic_accessor returns incorrect atomic_ref");
8786
T result = sum[0].fetch_add(T(1));
8887
if (result == it.get_local_range(0) - 1) {

SYCL/AtomicRef/add.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ void add_fetch_test(queue q, size_t N) {
2828
output_buf.template get_access<access::mode::discard_write>(cgh);
2929
cgh.parallel_for(range<1>(N), [=](item<1> it) {
3030
int gid = it.get_id(0);
31-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32-
ONEAPI::memory_scope::device,
31+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3332
access::address_space::global_space>(sum[0]);
3433
out[gid] = atm.fetch_add(Difference(1));
3534
});
@@ -64,8 +63,7 @@ void add_plus_equal_test(queue q, size_t N) {
6463
output_buf.template get_access<access::mode::discard_write>(cgh);
6564
cgh.parallel_for(range<1>(N), [=](item<1> it) {
6665
int gid = it.get_id(0);
67-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
68-
ONEAPI::memory_scope::device,
66+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
6967
access::address_space::global_space>(sum[0]);
7068
out[gid] = atm += Difference(1);
7169
});
@@ -100,8 +98,7 @@ void add_pre_inc_test(queue q, size_t N) {
10098
output_buf.template get_access<access::mode::discard_write>(cgh);
10199
cgh.parallel_for(range<1>(N), [=](item<1> it) {
102100
int gid = it.get_id(0);
103-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
104-
ONEAPI::memory_scope::device,
101+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
105102
access::address_space::global_space>(sum[0]);
106103
out[gid] = ++atm;
107104
});
@@ -136,8 +133,7 @@ void add_post_inc_test(queue q, size_t N) {
136133
output_buf.template get_access<access::mode::discard_write>(cgh);
137134
cgh.parallel_for(range<1>(N), [=](item<1> it) {
138135
int gid = it.get_id(0);
139-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
140-
ONEAPI::memory_scope::device,
136+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
141137
access::address_space::global_space>(sum[0]);
142138
out[gid] = atm++;
143139
});

SYCL/AtomicRef/assignment.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
#include <CL/sycl.hpp>
7+
#include <algorithm>
8+
#include <cassert>
9+
#include <numeric>
10+
#include <vector>
11+
using namespace sycl;
12+
using namespace sycl::ONEAPI;
13+
14+
template <typename T> class assignment_kernel;
15+
16+
template <typename T> void assignment_test(queue q, size_t N) {
17+
T initial = T(N);
18+
T assignment = initial;
19+
{
20+
buffer<T> assignment_buf(&assignment, 1);
21+
q.submit([&](handler &cgh) {
22+
auto st =
23+
assignment_buf.template get_access<access::mode::read_write>(cgh);
24+
cgh.parallel_for<assignment_kernel<T>>(range<1>(N), [=](item<1> it) {
25+
size_t gid = it.get_id(0);
26+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
27+
ONEAPI::memory_scope::device,
28+
access::address_space::global_space>(st[0]);
29+
atm = T(gid);
30+
});
31+
});
32+
}
33+
34+
// The initial value should have been overwritten by a work-item ID
35+
// Atomicity isn't tested here, but support for assignment() is
36+
assert(assignment != initial);
37+
assert(assignment >= T(0) && assignment <= T(N - 1));
38+
}
39+
40+
int main() {
41+
queue q;
42+
std::string version = q.get_device().get_info<info::device::version>();
43+
44+
constexpr int N = 32;
45+
assignment_test<int>(q, N);
46+
assignment_test<unsigned int>(q, N);
47+
assignment_test<long>(q, N);
48+
assignment_test<unsigned long>(q, N);
49+
assignment_test<long long>(q, N);
50+
assignment_test<unsigned long long>(q, N);
51+
assignment_test<float>(q, N);
52+
assignment_test<double>(q, N);
53+
assignment_test<char *>(q, N);
54+
55+
std::cout << "Test passed." << std::endl;
56+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
// L0, OpenCL, and ROCm backends don't currently support
7+
// info::device::atomic_memory_order_capabilities and aspect::atomic64
8+
// XFAIL: level_zero || opencl || rocm
9+
10+
// NOTE: Tests load and store for supported memory orderings.
11+
12+
#include <CL/sycl.hpp>
13+
#include <algorithm>
14+
#include <cassert>
15+
#include <numeric>
16+
#include <vector>
17+
using namespace sycl;
18+
using namespace sycl::ONEAPI;
19+
20+
template <typename T, memory_order MO> class memory_order_kernel;
21+
22+
template <typename T> void acq_rel_test(queue q, size_t N) {
23+
T a = 0;
24+
{
25+
buffer<T> a_buf(&a, 1);
26+
27+
q.submit([&](handler &cgh) {
28+
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
29+
cgh.parallel_for<memory_order_kernel<T, memory_order::acq_rel>>(
30+
range<1>(N), [=](item<1> it) {
31+
int gid = it.get_id(0);
32+
auto aar =
33+
atomic_ref<T, memory_order::acq_rel, memory_scope::device,
34+
access::address_space::global_space>(a_acc[0]);
35+
auto ld = aar.load();
36+
ld += 1;
37+
aar.store(ld);
38+
});
39+
});
40+
}
41+
42+
// All work-items increment by 1, so final value should be equal to N
43+
assert(a == T(N));
44+
}
45+
46+
template <typename T> void seq_cst_test(queue q, size_t N) {
47+
T a = 0;
48+
T b = 0;
49+
{
50+
buffer<T> a_buf(&a, 1);
51+
buffer<T> b_buf(&b, 1);
52+
53+
q.submit([&](handler &cgh) {
54+
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
55+
auto b_acc = b_buf.template get_access<access::mode::read_write>(cgh);
56+
cgh.parallel_for<memory_order_kernel<T, memory_order::seq_cst>>(
57+
range<1>(N), [=](item<1> it) {
58+
int gid = it.get_id(0);
59+
auto aar =
60+
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
61+
access::address_space::global_space>(a_acc[0]);
62+
auto bar =
63+
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
64+
access::address_space::global_space>(b_acc[0]);
65+
auto ald = aar.load();
66+
auto bld = bar.load();
67+
ald += 1;
68+
bld += ald;
69+
bar.store(bld);
70+
aar.store(ald);
71+
});
72+
});
73+
}
74+
75+
// All work-items increment a by 1, so final value should be equal to N
76+
assert(a == T(N));
77+
// b is the sum of [1..N]
78+
size_t rsum = 0;
79+
for (size_t i = 1; i <= N; ++i)
80+
rsum += i;
81+
assert(b == T(rsum));
82+
}
83+
84+
bool is_supported(std::vector<memory_order> capabilities,
85+
memory_order mem_order) {
86+
return std::find(capabilities.begin(), capabilities.end(), mem_order) !=
87+
capabilities.end();
88+
}
89+
90+
int main() {
91+
queue q;
92+
93+
std::vector<memory_order> supported_memory_orders =
94+
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
95+
bool atomic64_support = q.get_device().has(aspect::atomic64);
96+
97+
constexpr int N = 32;
98+
99+
// Relaxed memory order must be supported. This ordering is used in other
100+
// tests.
101+
assert(is_supported(supported_memory_orders, memory_order::relaxed));
102+
103+
if (is_supported(supported_memory_orders, memory_order::acq_rel)) {
104+
// Acquire-release memory order must also support both acquire and release
105+
// orderings.
106+
assert(is_supported(supported_memory_orders, memory_order::acquire) &&
107+
is_supported(supported_memory_orders, memory_order::release));
108+
acq_rel_test<int>(q, N);
109+
acq_rel_test<unsigned int>(q, N);
110+
acq_rel_test<float>(q, N);
111+
if (sizeof(long) == 4) {
112+
// long is 32-bit
113+
acq_rel_test<long>(q, N);
114+
acq_rel_test<unsigned long>(q, N);
115+
}
116+
if (atomic64_support) {
117+
if (sizeof(long) == 8) {
118+
// long is 64-bit
119+
acq_rel_test<long>(q, N);
120+
acq_rel_test<unsigned long>(q, N);
121+
}
122+
acq_rel_test<long long>(q, N);
123+
acq_rel_test<unsigned long long>(q, N);
124+
acq_rel_test<double>(q, N);
125+
}
126+
}
127+
128+
if (is_supported(supported_memory_orders, memory_order::seq_cst)) {
129+
seq_cst_test<int>(q, N);
130+
seq_cst_test<unsigned int>(q, N);
131+
seq_cst_test<float>(q, N);
132+
if (sizeof(long) == 4) {
133+
// long is 32-bit
134+
seq_cst_test<long>(q, N);
135+
seq_cst_test<unsigned long>(q, N);
136+
}
137+
if (atomic64_support) {
138+
if (sizeof(long) == 8) {
139+
// long is 64-bit
140+
seq_cst_test<long>(q, N);
141+
seq_cst_test<unsigned long>(q, N);
142+
}
143+
seq_cst_test<long long>(q, N);
144+
seq_cst_test<unsigned long long>(q, N);
145+
seq_cst_test<double>(q, N);
146+
}
147+
}
148+
149+
std::cout << "Test passed." << std::endl;
150+
}

SYCL/AtomicRef/compare_exchange.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ template <typename T> void compare_exchange_test(queue q, size_t N) {
3131
cgh.parallel_for<compare_exchange_kernel<T>>(
3232
range<1>(N), [=](item<1> it) {
3333
size_t gid = it.get_id(0);
34-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
35-
ONEAPI::memory_scope::device,
36-
access::address_space::global_space>(exc[0]);
34+
auto atm =
35+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
36+
access::address_space::global_space>(exc[0]);
3737
T result = T(N); // Avoid copying pointer
3838
bool success = atm.compare_exchange_strong(result, (T)gid);
3939
if (success) {

SYCL/AtomicRef/exchange.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ template <typename T> void exchange_test(queue q, size_t N) {
2929
output_buf.template get_access<access::mode::discard_write>(cgh);
3030
cgh.parallel_for<exchange_kernel<T>>(range<1>(N), [=](item<1> it) {
3131
size_t gid = it.get_id(0);
32-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
33-
ONEAPI::memory_scope::device,
32+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3433
access::address_space::global_space>(exc[0]);
3534
out[gid] = atm.exchange(T(gid));
3635
});

SYCL/AtomicRef/load.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ template <typename T> void load_test(queue q, size_t N) {
2828
output_buf.template get_access<access::mode::discard_write>(cgh);
2929
cgh.parallel_for<load_kernel<T>>(range<1>(N), [=](item<1> it) {
3030
size_t gid = it.get_id(0);
31-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32-
ONEAPI::memory_scope::device,
31+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3332
access::address_space::global_space>(ld[0]);
3433
out[gid] = atm.load();
3534
});

SYCL/AtomicRef/max.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ template <typename T> void max_test(queue q, size_t N) {
2727
output_buf.template get_access<access::mode::discard_write>(cgh);
2828
cgh.parallel_for(range<1>(N), [=](item<1> it) {
2929
int gid = it.get_id(0);
30-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
31-
ONEAPI::memory_scope::device,
30+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3231
access::address_space::global_space>(val[0]);
3332

3433
// +1 accounts for lowest() returning 0 for unsigned types

SYCL/AtomicRef/min.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ template <typename T> void min_test(queue q, size_t N) {
2727
output_buf.template get_access<access::mode::discard_write>(cgh);
2828
cgh.parallel_for(range<1>(N), [=](item<1> it) {
2929
int gid = it.get_id(0);
30-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
31-
ONEAPI::memory_scope::device,
30+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3231
access::address_space::global_space>(val[0]);
3332
out[gid] = atm.fetch_min(T(gid));
3433
});

SYCL/AtomicRef/store.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ template <typename T> void store_test(queue q, size_t N) {
2222
auto st = store_buf.template get_access<access::mode::read_write>(cgh);
2323
cgh.parallel_for<store_kernel<T>>(range<1>(N), [=](item<1> it) {
2424
size_t gid = it.get_id(0);
25-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
26-
ONEAPI::memory_scope::device,
25+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
2726
access::address_space::global_space>(st[0]);
2827
atm.store(T(gid));
2928
});

0 commit comments

Comments
 (0)