Add lsc ESIMD embargo tests

NikitaRudenkoIntel · azabazno · NikitaRudenkoIntel · commit 1a9bba7db91d · 2021-07-01T20:04:28.000+03:00
Co-Authored-By: Nikita Rudenko  &lt;nikita.rudenko@intel.com&gt;
Co-Authored-By: Anton Zabaznov  &lt;anton.zabaznov@intel.com&gt;
diff --git a/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_flat_2d_pvc.cpp b/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_flat_2d_pvc.cpp
@@ -0,0 +1,92 @@
+/*========================== begin_copyright_notice ============================
+INTEL CONFIDENTIAL
+Copyright (C) 2018-2021 Intel Corporation
+This software and the related documents are Intel copyrighted materials,
+and your use of them is governed by the express license under which they were
+provided to you ("License"). Unless the License provides otherwise,
+you may not use, modify, copy, publish, distribute, disclose or transmit this
+software or the related documents without Intel's prior written permission.
+This software and the related documents are provided as is, with no express or
+implied warranties, other than those that are expressly stated in the License.
+============================= end_copyright_notice ===========================*/
+
+// This test checks 2d flat lsc intrinsics
+// TODO enable this test on PVC fullsim when LSC patch is merged
+// TODO enable on Windows and Level Zero
+// REQUIRES: linux && gpu && opencl
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUNx: %GPU_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <random>
+#include <sycl/ext/intel/experimental/esimd.hpp>
+
+int main() {
+  using namespace cl::sycl;
+  using namespace sycl::ext::intel::experimental::esimd;
+  unsigned data_height = 4;
+  unsigned data_width = 9;
+  unsigned data_pitch = 16;
+  unsigned x = 0;
+  unsigned y = 0;
+  unsigned size = data_height * data_pitch;
+
+  auto GPUSelector = gpu_selector{};
+  auto q = queue{GPUSelector};
+  auto device = q.get_device();
+  std::cout << "Device name: " << device.get_info<info::device::name>()
+            << std::endl;
+
+  auto *input = malloc_shared<int>(size, q);
+  std::iota(input, input + size, 0);
+
+  constexpr unsigned Width = 4;
+  constexpr unsigned Height = 4;
+  constexpr unsigned NumBlocks = 1;
+  auto *block_store = malloc_shared<int>(size, q);
+
+  auto *ref = new int[size];
+  // Fill dst and ref data which is untouched with random values
+  for (int i = 0; i < size; i++)
+    block_store[i] = ref[i] = rand() % 128;
+
+  for (int i = 0; i < Height; i++) {
+    for (int j = 0; j < Width; j++) {
+      ref[y * data_pitch + i * data_pitch + x + j] =
+          input[y * data_pitch + i * data_pitch + x + j];
+    }
+  }
+  try {
+    q.submit([&](handler &h) {
+      h.parallel_for<class SimplestKernel>(
+          range<1>{1}, [=](id<1> id) SYCL_ESIMD_KERNEL {
+            lsc_flat_prefetch2d<int, Width, Height, NumBlocks, false, false,
+                                CacheHint::Uncached, CacheHint::Uncached>(
+                input, (data_width * sizeof(int)) - 1, data_height - 1,
+                (data_pitch * sizeof(int)) - 1, x, y);
+            auto data =
+                lsc_flat_load2d<int, Width, Height, NumBlocks, false, false,
+                                CacheHint::Uncached, CacheHint::Uncached>(
+                    input, (data_width * sizeof(int)) - 1, data_height - 1,
+                    (data_pitch * sizeof(int)) - 1, x, y);
+            lsc_flat_store2d<int, Width, Height, false, false,
+                             CacheHint::Uncached, CacheHint::Uncached>(
+                block_store, (data_width * sizeof(int)) - 1, data_height - 1,
+                (data_pitch * sizeof(int)) - 1, x, y, data);
+          });
+    });
+    q.wait();
+  } catch (sycl::exception e) {
+    std::cout << "SYCL exception caught: " << e.what();
+    return 1;
+  }
+
+  auto error = 0;
+  for (auto i = 0; i < size; ++i)
+    error += std::abs(ref[i] - block_store[i]);
+  std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
+  return error;
+}
diff --git a/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_flat_pvc.cpp b/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_flat_pvc.cpp
@@ -0,0 +1,95 @@
+/*========================== begin_copyright_notice ============================
+INTEL CONFIDENTIAL
+Copyright (C) 2018-2021 Intel Corporation
+This software and the related documents are Intel copyrighted materials,
+and your use of them is governed by the express license under which they were
+provided to you ("License"). Unless the License provides otherwise,
+you may not use, modify, copy, publish, distribute, disclose or transmit this
+software or the related documents without Intel's prior written permission.
+This software and the related documents are provided as is, with no express or
+implied warranties, other than those that are expressly stated in the License.
+============================= end_copyright_notice ===========================*/
+
+// This test checks 1d flat lsc intrinsics
+// TODO enable this test on PVC fullsim when LSC patch is merged
+// TODO enable on Windows and Level Zero
+// REQUIRES: linux && gpu && opencl
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUNx: %GPU_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <sycl/ext/intel/experimental/esimd.hpp>
+
+int main() {
+  using namespace cl::sycl;
+  using namespace sycl::ext::intel::experimental::esimd;
+  auto size = size_t{128};
+  auto constexpr SIMDSize = unsigned{4};
+
+  auto GPUSelector = gpu_selector{};
+  auto q = queue{GPUSelector};
+  auto device = q.get_device();
+  std::cout << "Device name: " << device.get_info<info::device::name>()
+            << std::endl;
+
+  auto *vec_0 = malloc_shared<int>(size, q);
+  auto *vec_1 = malloc_shared<int>(size, q);
+  auto *vec_2 = malloc_shared<int>(size, q);
+  auto *vec_3 = malloc_shared<int>(size, q);
+  auto *vec_4 = malloc_shared<int>(size, q);
+  std::iota(vec_0, vec_0 + size, 0);
+  std::iota(vec_1, vec_1 + size, 0);
+  std::iota(vec_2, vec_2 + size, 0);
+  std::iota(vec_3, vec_3 + size, 0);
+  std::iota(vec_4, vec_4 + size, 0);
+
+  try {
+    q.submit([&](handler &h) {
+      h.parallel_for<class SimplestKernel>(
+          range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
+            auto offset = id[0] * SIMDSize;
+            auto offsets =
+                simd<uint32_t, SIMDSize>(id * SIMDSize, 1) * sizeof(int);
+            auto pred = simd<uint16_t, SIMDSize>(1);
+            auto add = simd<uint16_t, SIMDSize>(5);
+            auto compare = simd<uint32_t, SIMDSize>(id * SIMDSize, 1);
+            auto swap = compare * 2;
+
+            lsc_flat_prefetch<int, SIMDSize, lsc_data_size::default_size,
+                              CacheHint::Uncached, CacheHint::Uncached>(vec_0 +
+                                                                        offset);
+            auto data_0 = lsc_flat_load<int, SIMDSize>(vec_0 + offset);
+            lsc_flat_store<int, SIMDSize>(vec_0 + offset, data_0 * 2);
+
+            lsc_flat_prefetch<int, 1, lsc_data_size::default_size,
+                              CacheHint::Uncached, CacheHint::Uncached>(
+                vec_1, offsets);
+            auto data_1 = lsc_flat_load<int>(vec_1, offsets);
+            lsc_flat_store<int>(vec_1, data_1 * 2, offsets);
+
+            lsc_flat_atomic<int, atomic_op::inc>(vec_2, offsets, pred);
+            lsc_flat_atomic<int, atomic_op::add>(vec_3, offsets, add, pred);
+            lsc_flat_atomic<int, atomic_op::cmpxchg>(vec_4, offsets, compare,
+                                                     swap, pred);
+          });
+    });
+    q.wait();
+  } catch (sycl::exception e) {
+    std::cout << "SYCL exception caught: " << e.what();
+    return 1;
+  }
+
+  auto error = 0;
+  for (auto i = 0; i != size; ++i) {
+    error += std::abs(vec_0[i] - 2 * i);
+    error += std::abs(vec_1[i] - 2 * i);
+    error += std::abs(vec_2[i] - (i + 1));
+    error += std::abs(vec_3[i] - (i + 5));
+    error += std::abs(vec_4[i] - (i * 2));
+  }
+  std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
+  return error;
+}
diff --git a/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_slm_pvc.cpp b/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_slm_pvc.cpp
@@ -0,0 +1,114 @@
+/*========================== begin_copyright_notice ============================
+INTEL CONFIDENTIAL
+Copyright (C) 2018-2021 Intel Corporation
+This software and the related documents are Intel copyrighted materials,
+and your use of them is governed by the express license under which they were
+provided to you ("License"). Unless the License provides otherwise,
+you may not use, modify, copy, publish, distribute, disclose or transmit this
+software or the related documents without Intel's prior written permission.
+This software and the related documents are provided as is, with no express or
+implied warranties, other than those that are expressly stated in the License.
+============================= end_copyright_notice ===========================*/
+
+// This test checks 1d slm lsc intrinsics
+// TODO enable this test on PVC fullsim when LSC patch is merged
+// TODO enable on Windows and Level Zero
+// REQUIRES: linux && gpu && opencl
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUNx: %GPU_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <sycl/ext/intel/experimental/esimd.hpp>
+
+int main() {
+  using namespace cl::sycl;
+  using namespace sycl::ext::intel::experimental::esimd;
+  auto size = size_t{128};
+  auto constexpr SIMDSize = unsigned{4};
+
+  auto GPUSelector = gpu_selector{};
+  auto q = queue{GPUSelector};
+  auto device = q.get_device();
+  std::cout << "Device name: " << device.get_info<info::device::name>()
+            << std::endl;
+
+  auto vec_0 = std::vector<int>(size);
+  auto vec_1 = std::vector<int>(size);
+  auto vec_2 = std::vector<int>(size);
+  auto vec_3 = std::vector<int>(size);
+  auto vec_4 = std::vector<int>(size);
+  auto buf_0 = buffer{vec_0};
+  auto buf_1 = buffer{vec_1};
+  auto buf_2 = buffer{vec_2};
+  auto buf_3 = buffer{vec_3};
+  auto buf_4 = buffer{vec_4};
+
+  try {
+    q.submit([&](handler &h) {
+      auto access_0 = buf_0.template get_access<access::mode::read_write>(h);
+      auto access_1 = buf_1.template get_access<access::mode::read_write>(h);
+      auto access_2 = buf_2.template get_access<access::mode::read_write>(h);
+      auto access_3 = buf_3.template get_access<access::mode::read_write>(h);
+      auto access_4 = buf_4.template get_access<access::mode::read_write>(h);
+      h.parallel_for<class SimplestKernel>(
+          range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
+            auto offset = id * SIMDSize * sizeof(int);
+            auto offsets =
+                simd<uint32_t, SIMDSize>(id * SIMDSize, 1) * sizeof(int);
+            auto data = simd<int, SIMDSize>(id * SIMDSize, 1);
+            auto pred = simd<uint16_t, SIMDSize>(1);
+            auto add = simd<uint16_t, SIMDSize>(5);
+            auto compare = simd<uint32_t, SIMDSize>(id * SIMDSize, 1);
+            auto swap = compare * 2;
+
+            slm_init(4096);
+            lsc_slm_store<int, SIMDSize>(data * 2, offset);
+            auto data_0 = lsc_slm_load<int, SIMDSize>(offset);
+            lsc_surf_store<int, SIMDSize>(data_0, access_0, offset);
+
+            lsc_slm_store<int>(data * 2, offsets);
+            auto data_1 = lsc_slm_load<int>(offsets);
+            lsc_surf_store<int, SIMDSize>(data_1, access_1, offset);
+
+            lsc_slm_store<int, SIMDSize>(data, offset);
+            lsc_slm_atomic<int, atomic_op::inc>(offsets, pred);
+            auto data_2 = lsc_slm_load<int, SIMDSize>(offset);
+            lsc_surf_store<int, SIMDSize>(data_2, access_2, offset);
+
+            lsc_slm_store<int, SIMDSize>(data, offset);
+            lsc_slm_atomic<int, atomic_op::add>(offsets, add, pred);
+            auto data_3 = lsc_slm_load<int, SIMDSize>(offset);
+            lsc_surf_store<int, SIMDSize>(data_3, access_3, offset);
+
+            lsc_slm_store<int, SIMDSize>(data, offset);
+            lsc_slm_atomic<int, atomic_op::cmpxchg>(offsets, compare, swap,
+                                                    pred);
+            auto data_4 = lsc_slm_load<int, SIMDSize>(offset);
+            lsc_surf_store<int, SIMDSize>(data_4, access_4, offset);
+          });
+    });
+    q.wait();
+    buf_0.template get_access<access::mode::read_write>();
+    buf_1.template get_access<access::mode::read_write>();
+    buf_2.template get_access<access::mode::read_write>();
+    buf_3.template get_access<access::mode::read_write>();
+    buf_4.template get_access<access::mode::read_write>();
+  } catch (sycl::exception e) {
+    std::cout << "SYCL exception caught: " << e.what();
+    return 1;
+  }
+
+  auto error = 0;
+  for (auto i = 0; i != size; ++i) {
+    error += std::abs(vec_0[i] - (i * 2));
+    error += std::abs(vec_1[i] - (i * 2));
+    error += std::abs(vec_2[i] - (i + 1));
+    error += std::abs(vec_3[i] - (i + 5));
+    error += std::abs(vec_4[i] - (i * 2));
+  }
+  std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
+  return error;
+}
diff --git a/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_surf_pvc.cpp b/SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc_surf_pvc.cpp