Skip to content

Commit 1a9bba7

Browse files
Add lsc ESIMD embargo tests
Co-Authored-By: Nikita Rudenko <[email protected]> Co-Authored-By: Anton Zabaznov <[email protected]>
1 parent 6cb7d84 commit 1a9bba7

File tree

4 files changed

+411
-0
lines changed

4 files changed

+411
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*========================== begin_copyright_notice ============================
2+
INTEL CONFIDENTIAL
3+
Copyright (C) 2018-2021 Intel Corporation
4+
This software and the related documents are Intel copyrighted materials,
5+
and your use of them is governed by the express license under which they were
6+
provided to you ("License"). Unless the License provides otherwise,
7+
you may not use, modify, copy, publish, distribute, disclose or transmit this
8+
software or the related documents without Intel's prior written permission.
9+
This software and the related documents are provided as is, with no express or
10+
implied warranties, other than those that are expressly stated in the License.
11+
============================= end_copyright_notice ===========================*/
12+
13+
// This test checks 2d flat lsc intrinsics
14+
// TODO enable this test on PVC fullsim when LSC patch is merged
15+
// TODO enable on Windows and Level Zero
16+
// REQUIRES: linux && gpu && opencl
17+
// RUN: %clangxx -fsycl %s -o %t.out
18+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
19+
20+
#include <CL/sycl.hpp>
21+
#include <algorithm>
22+
#include <cmath>
23+
#include <numeric>
24+
#include <random>
25+
#include <sycl/ext/intel/experimental/esimd.hpp>
26+
27+
int main() {
28+
using namespace cl::sycl;
29+
using namespace sycl::ext::intel::experimental::esimd;
30+
unsigned data_height = 4;
31+
unsigned data_width = 9;
32+
unsigned data_pitch = 16;
33+
unsigned x = 0;
34+
unsigned y = 0;
35+
unsigned size = data_height * data_pitch;
36+
37+
auto GPUSelector = gpu_selector{};
38+
auto q = queue{GPUSelector};
39+
auto device = q.get_device();
40+
std::cout << "Device name: " << device.get_info<info::device::name>()
41+
<< std::endl;
42+
43+
auto *input = malloc_shared<int>(size, q);
44+
std::iota(input, input + size, 0);
45+
46+
constexpr unsigned Width = 4;
47+
constexpr unsigned Height = 4;
48+
constexpr unsigned NumBlocks = 1;
49+
auto *block_store = malloc_shared<int>(size, q);
50+
51+
auto *ref = new int[size];
52+
// Fill dst and ref data which is untouched with random values
53+
for (int i = 0; i < size; i++)
54+
block_store[i] = ref[i] = rand() % 128;
55+
56+
for (int i = 0; i < Height; i++) {
57+
for (int j = 0; j < Width; j++) {
58+
ref[y * data_pitch + i * data_pitch + x + j] =
59+
input[y * data_pitch + i * data_pitch + x + j];
60+
}
61+
}
62+
try {
63+
q.submit([&](handler &h) {
64+
h.parallel_for<class SimplestKernel>(
65+
range<1>{1}, [=](id<1> id) SYCL_ESIMD_KERNEL {
66+
lsc_flat_prefetch2d<int, Width, Height, NumBlocks, false, false,
67+
CacheHint::Uncached, CacheHint::Uncached>(
68+
input, (data_width * sizeof(int)) - 1, data_height - 1,
69+
(data_pitch * sizeof(int)) - 1, x, y);
70+
auto data =
71+
lsc_flat_load2d<int, Width, Height, NumBlocks, false, false,
72+
CacheHint::Uncached, CacheHint::Uncached>(
73+
input, (data_width * sizeof(int)) - 1, data_height - 1,
74+
(data_pitch * sizeof(int)) - 1, x, y);
75+
lsc_flat_store2d<int, Width, Height, false, false,
76+
CacheHint::Uncached, CacheHint::Uncached>(
77+
block_store, (data_width * sizeof(int)) - 1, data_height - 1,
78+
(data_pitch * sizeof(int)) - 1, x, y, data);
79+
});
80+
});
81+
q.wait();
82+
} catch (sycl::exception e) {
83+
std::cout << "SYCL exception caught: " << e.what();
84+
return 1;
85+
}
86+
87+
auto error = 0;
88+
for (auto i = 0; i < size; ++i)
89+
error += std::abs(ref[i] - block_store[i]);
90+
std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
91+
return error;
92+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*========================== begin_copyright_notice ============================
2+
INTEL CONFIDENTIAL
3+
Copyright (C) 2018-2021 Intel Corporation
4+
This software and the related documents are Intel copyrighted materials,
5+
and your use of them is governed by the express license under which they were
6+
provided to you ("License"). Unless the License provides otherwise,
7+
you may not use, modify, copy, publish, distribute, disclose or transmit this
8+
software or the related documents without Intel's prior written permission.
9+
This software and the related documents are provided as is, with no express or
10+
implied warranties, other than those that are expressly stated in the License.
11+
============================= end_copyright_notice ===========================*/
12+
13+
// This test checks 1d flat lsc intrinsics
14+
// TODO enable this test on PVC fullsim when LSC patch is merged
15+
// TODO enable on Windows and Level Zero
16+
// REQUIRES: linux && gpu && opencl
17+
// RUN: %clangxx -fsycl %s -o %t.out
18+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
19+
20+
#include <CL/sycl.hpp>
21+
#include <algorithm>
22+
#include <cmath>
23+
#include <numeric>
24+
#include <sycl/ext/intel/experimental/esimd.hpp>
25+
26+
int main() {
27+
using namespace cl::sycl;
28+
using namespace sycl::ext::intel::experimental::esimd;
29+
auto size = size_t{128};
30+
auto constexpr SIMDSize = unsigned{4};
31+
32+
auto GPUSelector = gpu_selector{};
33+
auto q = queue{GPUSelector};
34+
auto device = q.get_device();
35+
std::cout << "Device name: " << device.get_info<info::device::name>()
36+
<< std::endl;
37+
38+
auto *vec_0 = malloc_shared<int>(size, q);
39+
auto *vec_1 = malloc_shared<int>(size, q);
40+
auto *vec_2 = malloc_shared<int>(size, q);
41+
auto *vec_3 = malloc_shared<int>(size, q);
42+
auto *vec_4 = malloc_shared<int>(size, q);
43+
std::iota(vec_0, vec_0 + size, 0);
44+
std::iota(vec_1, vec_1 + size, 0);
45+
std::iota(vec_2, vec_2 + size, 0);
46+
std::iota(vec_3, vec_3 + size, 0);
47+
std::iota(vec_4, vec_4 + size, 0);
48+
49+
try {
50+
q.submit([&](handler &h) {
51+
h.parallel_for<class SimplestKernel>(
52+
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
53+
auto offset = id[0] * SIMDSize;
54+
auto offsets =
55+
simd<uint32_t, SIMDSize>(id * SIMDSize, 1) * sizeof(int);
56+
auto pred = simd<uint16_t, SIMDSize>(1);
57+
auto add = simd<uint16_t, SIMDSize>(5);
58+
auto compare = simd<uint32_t, SIMDSize>(id * SIMDSize, 1);
59+
auto swap = compare * 2;
60+
61+
lsc_flat_prefetch<int, SIMDSize, lsc_data_size::default_size,
62+
CacheHint::Uncached, CacheHint::Uncached>(vec_0 +
63+
offset);
64+
auto data_0 = lsc_flat_load<int, SIMDSize>(vec_0 + offset);
65+
lsc_flat_store<int, SIMDSize>(vec_0 + offset, data_0 * 2);
66+
67+
lsc_flat_prefetch<int, 1, lsc_data_size::default_size,
68+
CacheHint::Uncached, CacheHint::Uncached>(
69+
vec_1, offsets);
70+
auto data_1 = lsc_flat_load<int>(vec_1, offsets);
71+
lsc_flat_store<int>(vec_1, data_1 * 2, offsets);
72+
73+
lsc_flat_atomic<int, atomic_op::inc>(vec_2, offsets, pred);
74+
lsc_flat_atomic<int, atomic_op::add>(vec_3, offsets, add, pred);
75+
lsc_flat_atomic<int, atomic_op::cmpxchg>(vec_4, offsets, compare,
76+
swap, pred);
77+
});
78+
});
79+
q.wait();
80+
} catch (sycl::exception e) {
81+
std::cout << "SYCL exception caught: " << e.what();
82+
return 1;
83+
}
84+
85+
auto error = 0;
86+
for (auto i = 0; i != size; ++i) {
87+
error += std::abs(vec_0[i] - 2 * i);
88+
error += std::abs(vec_1[i] - 2 * i);
89+
error += std::abs(vec_2[i] - (i + 1));
90+
error += std::abs(vec_3[i] - (i + 5));
91+
error += std::abs(vec_4[i] - (i * 2));
92+
}
93+
std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
94+
return error;
95+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*========================== begin_copyright_notice ============================
2+
INTEL CONFIDENTIAL
3+
Copyright (C) 2018-2021 Intel Corporation
4+
This software and the related documents are Intel copyrighted materials,
5+
and your use of them is governed by the express license under which they were
6+
provided to you ("License"). Unless the License provides otherwise,
7+
you may not use, modify, copy, publish, distribute, disclose or transmit this
8+
software or the related documents without Intel's prior written permission.
9+
This software and the related documents are provided as is, with no express or
10+
implied warranties, other than those that are expressly stated in the License.
11+
============================= end_copyright_notice ===========================*/
12+
13+
// This test checks 1d slm lsc intrinsics
14+
// TODO enable this test on PVC fullsim when LSC patch is merged
15+
// TODO enable on Windows and Level Zero
16+
// REQUIRES: linux && gpu && opencl
17+
// RUN: %clangxx -fsycl %s -o %t.out
18+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
19+
20+
#include <CL/sycl.hpp>
21+
#include <algorithm>
22+
#include <cmath>
23+
#include <numeric>
24+
#include <sycl/ext/intel/experimental/esimd.hpp>
25+
26+
int main() {
27+
using namespace cl::sycl;
28+
using namespace sycl::ext::intel::experimental::esimd;
29+
auto size = size_t{128};
30+
auto constexpr SIMDSize = unsigned{4};
31+
32+
auto GPUSelector = gpu_selector{};
33+
auto q = queue{GPUSelector};
34+
auto device = q.get_device();
35+
std::cout << "Device name: " << device.get_info<info::device::name>()
36+
<< std::endl;
37+
38+
auto vec_0 = std::vector<int>(size);
39+
auto vec_1 = std::vector<int>(size);
40+
auto vec_2 = std::vector<int>(size);
41+
auto vec_3 = std::vector<int>(size);
42+
auto vec_4 = std::vector<int>(size);
43+
auto buf_0 = buffer{vec_0};
44+
auto buf_1 = buffer{vec_1};
45+
auto buf_2 = buffer{vec_2};
46+
auto buf_3 = buffer{vec_3};
47+
auto buf_4 = buffer{vec_4};
48+
49+
try {
50+
q.submit([&](handler &h) {
51+
auto access_0 = buf_0.template get_access<access::mode::read_write>(h);
52+
auto access_1 = buf_1.template get_access<access::mode::read_write>(h);
53+
auto access_2 = buf_2.template get_access<access::mode::read_write>(h);
54+
auto access_3 = buf_3.template get_access<access::mode::read_write>(h);
55+
auto access_4 = buf_4.template get_access<access::mode::read_write>(h);
56+
h.parallel_for<class SimplestKernel>(
57+
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
58+
auto offset = id * SIMDSize * sizeof(int);
59+
auto offsets =
60+
simd<uint32_t, SIMDSize>(id * SIMDSize, 1) * sizeof(int);
61+
auto data = simd<int, SIMDSize>(id * SIMDSize, 1);
62+
auto pred = simd<uint16_t, SIMDSize>(1);
63+
auto add = simd<uint16_t, SIMDSize>(5);
64+
auto compare = simd<uint32_t, SIMDSize>(id * SIMDSize, 1);
65+
auto swap = compare * 2;
66+
67+
slm_init(4096);
68+
lsc_slm_store<int, SIMDSize>(data * 2, offset);
69+
auto data_0 = lsc_slm_load<int, SIMDSize>(offset);
70+
lsc_surf_store<int, SIMDSize>(data_0, access_0, offset);
71+
72+
lsc_slm_store<int>(data * 2, offsets);
73+
auto data_1 = lsc_slm_load<int>(offsets);
74+
lsc_surf_store<int, SIMDSize>(data_1, access_1, offset);
75+
76+
lsc_slm_store<int, SIMDSize>(data, offset);
77+
lsc_slm_atomic<int, atomic_op::inc>(offsets, pred);
78+
auto data_2 = lsc_slm_load<int, SIMDSize>(offset);
79+
lsc_surf_store<int, SIMDSize>(data_2, access_2, offset);
80+
81+
lsc_slm_store<int, SIMDSize>(data, offset);
82+
lsc_slm_atomic<int, atomic_op::add>(offsets, add, pred);
83+
auto data_3 = lsc_slm_load<int, SIMDSize>(offset);
84+
lsc_surf_store<int, SIMDSize>(data_3, access_3, offset);
85+
86+
lsc_slm_store<int, SIMDSize>(data, offset);
87+
lsc_slm_atomic<int, atomic_op::cmpxchg>(offsets, compare, swap,
88+
pred);
89+
auto data_4 = lsc_slm_load<int, SIMDSize>(offset);
90+
lsc_surf_store<int, SIMDSize>(data_4, access_4, offset);
91+
});
92+
});
93+
q.wait();
94+
buf_0.template get_access<access::mode::read_write>();
95+
buf_1.template get_access<access::mode::read_write>();
96+
buf_2.template get_access<access::mode::read_write>();
97+
buf_3.template get_access<access::mode::read_write>();
98+
buf_4.template get_access<access::mode::read_write>();
99+
} catch (sycl::exception e) {
100+
std::cout << "SYCL exception caught: " << e.what();
101+
return 1;
102+
}
103+
104+
auto error = 0;
105+
for (auto i = 0; i != size; ++i) {
106+
error += std::abs(vec_0[i] - (i * 2));
107+
error += std::abs(vec_1[i] - (i * 2));
108+
error += std::abs(vec_2[i] - (i + 1));
109+
error += std::abs(vec_3[i] - (i + 5));
110+
error += std::abs(vec_4[i] - (i * 2));
111+
}
112+
std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl;
113+
return error;
114+
}

0 commit comments

Comments
 (0)