intel
diff --git a/‎SYCL/AtomicRef/accessor.cpp
Lines changed: 111 additions & 0 deletions b/‎SYCL/AtomicRef/accessor.cpp
Lines changed: 111 additions & 0 deletions
diff --git a/‎SYCL/AtomicRef/add.cpp
Lines changed: 198 additions & 0 deletions b/‎SYCL/AtomicRef/add.cpp
Lines changed: 198 additions & 0 deletions
diff --git a/‎SYCL/AtomicRef/compare_exchange.cpp
Lines changed: 78 additions & 0 deletions b/‎SYCL/AtomicRef/compare_exchange.cpp
Lines changed: 78 additions & 0 deletions
@@ -0,0 +1,111 @@
+// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <numeric>
+#include <vector>
+using namespace sycl;
+using namespace sycl::ONEAPI;
+
+// Equivalent to add_test from add.cpp
+// Uses atomic_accessor instead of atomic_ref
+template <typename T> void accessor_test(queue q, size_t N) {
+  T sum = 0;
+  std::vector<T> output(N, 0);
+  {
+    buffer<T> sum_buf(&sum, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+#if __cplusplus > 201402L
+      static_assert(
+          std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
+                                                device_scope)),
+                       atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                       ONEAPI::memory_scope::device>>::value,
+          "atomic_accessor type incorrectly deduced");
+#endif
+      auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                 ONEAPI::memory_scope::device>(sum_buf, cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for(range<1>(N), [=](item<1> it) {
+        int gid = it.get_id(0);
+        static_assert(
+            std::is_same<
+                decltype(sum[0]),
+                atomic_ref<T, ONEAPI::memory_order::relaxed,
+                           ONEAPI::memory_scope::device,
+                           access::address_space::global_space>>::value,
+            "atomic_accessor returns incorrect atomic_ref");
+        out[gid] = sum[0].fetch_add(T(1));
+      });
+    });
+  }
+
+  // All work-items increment by 1, so final value should be equal to N
+  assert(sum == N);
+
+  // Intermediate values should be unique
+  std::sort(output.begin(), output.end());
+  assert(std::unique(output.begin(), output.end()) == output.end());
+
+  // Fetch returns original value: will be in [0, N-1]
+  auto min_e = output[0];
+  auto max_e = output[output.size() - 1];
+  assert(min_e == 0 && max_e == N - 1);
+}
+
+// Simplified form of accessor_test for local memory
+template <typename T>
+void local_accessor_test(queue q, size_t N, size_t L = 8) {
+  assert(N % L == 0);
+  std::vector<T> output(N / L, 0);
+  {
+    buffer<T> output_buf(output.data(), output.size());
+    q.submit([&](handler &cgh) {
+      auto sum =
+          atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                          ONEAPI::memory_scope::device, access::target::local>(
+              1, cgh);
+      auto out = output_buf.template get_access<access::mode::read_write>(cgh);
+      cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
+        int grp = it.get_group(0);
+        sum[0].store(0);
+        it.barrier();
+        static_assert(
+            std::is_same<decltype(sum[0]),
+                         atomic_ref<T, ONEAPI::memory_order::relaxed,
+                                    ONEAPI::memory_scope::device,
+                                    access::address_space::local_space>>::value,
+            "local atomic_accessor returns incorrect atomic_ref");
+        T result = sum[0].fetch_add(T(1));
+        if (result == it.get_local_range(0) - 1) {
+          out[grp] = result;
+        }
+      });
+    });
+  }
+
+  // All work-items increment by 1, and last in the group writes out old value
+  // All values should be L-1
+  assert(std::all_of(output.begin(), output.end(),
+                     [=](T x) { return x == L - 1; }));
+}
+
+int main() {
+  queue q;
+  constexpr int N = 32;
+  accessor_test<int>(q, N);
+  // TODO: Enable local accessor test for host when barrier is supported
+  if (!q.get_device().is_host()) {
+    local_accessor_test<int>(q, N);
+  }
+  std::cout << "Test passed." << std::endl;
+}
@@ -0,0 +1,198 @@
+// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <numeric>
+#include <vector>
+using namespace sycl;
+using namespace sycl::ONEAPI;
+
+template <typename T, typename Difference = T>
+void add_fetch_test(queue q, size_t N) {
+  T sum = 0;
+  std::vector<T> output(N);
+  std::fill(output.begin(), output.end(), T(0));
+  {
+    buffer<T> sum_buf(&sum, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+      auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for(range<1>(N), [=](item<1> it) {
+        int gid = it.get_id(0);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
+        out[gid] = atm.fetch_add(Difference(1));
+      });
+    });
+  }
+
+  // All work-items increment by 1, so final value should be equal to N
+  assert(sum == T(N));
+
+  // Fetch returns original value: will be in [0, N-1]
+  auto min_e = std::min_element(output.begin(), output.end());
+  auto max_e = std::max_element(output.begin(), output.end());
+  assert(*min_e == T(0) && *max_e == T(N - 1));
+
+  // Intermediate values should be unique
+  std::sort(output.begin(), output.end());
+  assert(std::unique(output.begin(), output.end()) == output.end());
+}
+
+template <typename T, typename Difference = T>
+void add_plus_equal_test(queue q, size_t N) {
+  T sum = 0;
+  std::vector<T> output(N);
+  std::fill(output.begin(), output.end(), T(0));
+  {
+    buffer<T> sum_buf(&sum, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+      auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for(range<1>(N), [=](item<1> it) {
+        int gid = it.get_id(0);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
+        out[gid] = atm += Difference(1);
+      });
+    });
+  }
+
+  // All work-items increment by 1, so final value should be equal to N
+  assert(sum == T(N));
+
+  // += returns updated value: will be in [1, N]
+  auto min_e = std::min_element(output.begin(), output.end());
+  auto max_e = std::max_element(output.begin(), output.end());
+  assert(*min_e == T(1) && *max_e == T(N));
+
+  // Intermediate values should be unique
+  std::sort(output.begin(), output.end());
+  assert(std::unique(output.begin(), output.end()) == output.end());
+}
+
+template <typename T, typename Difference = T>
+void add_pre_inc_test(queue q, size_t N) {
+  T sum = 0;
+  std::vector<T> output(N);
+  std::fill(output.begin(), output.end(), T(0));
+  {
+    buffer<T> sum_buf(&sum, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+      auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for(range<1>(N), [=](item<1> it) {
+        int gid = it.get_id(0);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
+        out[gid] = ++atm;
+      });
+    });
+  }
+
+  // All work-items increment by 1, so final value should be equal to N
+  assert(sum == T(N));
+
+  // Pre-increment returns updated value: will be in [1, N]
+  auto min_e = std::min_element(output.begin(), output.end());
+  auto max_e = std::max_element(output.begin(), output.end());
+  assert(*min_e == T(1) && *max_e == T(N));
+
+  // Intermediate values should be unique
+  std::sort(output.begin(), output.end());
+  assert(std::unique(output.begin(), output.end()) == output.end());
+}
+
+template <typename T, typename Difference = T>
+void add_post_inc_test(queue q, size_t N) {
+  T sum = 0;
+  std::vector<T> output(N);
+  std::fill(output.begin(), output.end(), T(0));
+  {
+    buffer<T> sum_buf(&sum, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+      auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for(range<1>(N), [=](item<1> it) {
+        int gid = it.get_id(0);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
+        out[gid] = atm++;
+      });
+    });
+  }
+
+  // All work-items increment by 1, so final value should be equal to N
+  assert(sum == T(N));
+
+  // Post-increment returns original value: will be in [0, N-1]
+  auto min_e = std::min_element(output.begin(), output.end());
+  auto max_e = std::max_element(output.begin(), output.end());
+  assert(*min_e == T(0) && *max_e == T(N - 1));
+
+  // Intermediate values should be unique
+  std::sort(output.begin(), output.end());
+  assert(std::unique(output.begin(), output.end()) == output.end());
+}
+
+template <typename T, typename Difference = T>
+void add_test(queue q, size_t N) {
+  add_fetch_test<T, Difference>(q, N);
+  add_plus_equal_test<T, Difference>(q, N);
+  add_pre_inc_test<T, Difference>(q, N);
+  add_post_inc_test<T, Difference>(q, N);
+}
+
+// Floating-point types do not support pre- or post-increment
+template <> void add_test<float>(queue q, size_t N) {
+  add_fetch_test<float>(q, N);
+  add_plus_equal_test<float>(q, N);
+}
+template <> void add_test<double>(queue q, size_t N) {
+  add_fetch_test<double>(q, N);
+  add_plus_equal_test<double>(q, N);
+}
+
+int main() {
+  queue q;
+  std::string version = q.get_device().get_info<info::device::version>();
+  if (version < std::string("2.0")) {
+    std::cout << "Skipping test\n";
+    return 0;
+  }
+
+  constexpr int N = 32;
+  add_test<int>(q, N);
+  add_test<unsigned int>(q, N);
+  add_test<long>(q, N);
+  add_test<unsigned long>(q, N);
+  add_test<long long>(q, N);
+  add_test<unsigned long long>(q, N);
+  add_test<float>(q, N);
+  add_test<double>(q, N);
+  add_test<char *, ptrdiff_t>(q, N);
+
+  std::cout << "Test passed." << std::endl;
+}
@@ -0,0 +1,78 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cassert>
+#include <numeric>
+#include <vector>
+using namespace sycl;
+using namespace sycl::ONEAPI;
+
+template <typename T> class compare_exchange_kernel;
+
+template <typename T> void compare_exchange_test(queue q, size_t N) {
+  const T initial = T(N);
+  T compare_exchange = initial;
+  std::vector<T> output(N);
+  std::fill(output.begin(), output.end(), T(0));
+  {
+    buffer<T> compare_exchange_buf(&compare_exchange, 1);
+    buffer<T> output_buf(output.data(), output.size());
+
+    q.submit([&](handler &cgh) {
+      auto exc =
+          compare_exchange_buf.template get_access<access::mode::read_write>(
+              cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
+      cgh.parallel_for<compare_exchange_kernel<T>>(
+          range<1>(N), [=](item<1> it) {
+            size_t gid = it.get_id(0);
+            auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                                  ONEAPI::memory_scope::device,
+                                  access::address_space::global_space>(exc[0]);
+            T result = T(N); // Avoid copying pointer
+            bool success = atm.compare_exchange_strong(result, (T)gid);
+            if (success) {
+              out[gid] = result;
+            } else {
+              out[gid] = T(gid);
+            }
+          });
+    });
+  }
+
+  // Only one work-item should have received the initial sentinel value
+  assert(std::count(output.begin(), output.end(), initial) == 1);
+
+  // All other values should be the index itself or the sentinel value
+  for (size_t i = 0; i < N; ++i) {
+    assert(output[i] == T(i) || output[i] == initial);
+  }
+}
+
+int main() {
+  queue q;
+  std::string version = q.get_device().get_info<info::device::version>();
+  if (version < std::string("2.0")) {
+    std::cout << "Skipping test\n";
+    return 0;
+  }
+
+  constexpr int N = 32;
+  compare_exchange_test<int>(q, N);
+  compare_exchange_test<unsigned int>(q, N);
+  compare_exchange_test<long>(q, N);
+  compare_exchange_test<unsigned long>(q, N);
+  compare_exchange_test<long long>(q, N);
+  compare_exchange_test<unsigned long long>(q, N);
+  compare_exchange_test<float>(q, N);
+  compare_exchange_test<double>(q, N);
+  compare_exchange_test<char *>(q, N);
+
+  std::cout << "Test passed." << std::endl;
+}