Skip to content

Commit 2d046fe

Browse files
authored
Merge pull request intel#1235 from fineg74/scsel-cfl-11-xmain-topic
[SYCL][ESIMD] Lsc block load/store predicate test (intel#1194)
2 parents c5b1181 + fc79e2b commit 2d046fe

File tree

2 files changed

+302
-0
lines changed

2 files changed

+302
-0
lines changed

SYCL/ESIMD/lsc/lsc_predicate.cpp

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
//==------------ lsc_predicate.cpp - DPC++ ESIMD on-device test ------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu-intel-pvc || esimd_emulator
9+
// RUN: %clangxx -fsycl %s -o %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
12+
// The test checks functionality of the lsc_block_load, lsc_block_store
13+
// with newly introduced predicate parameter.
14+
15+
#include "../esimd_test_utils.hpp"
16+
17+
#include <algorithm>
18+
#include <cmath>
19+
#include <numeric>
20+
#include <sycl/ext/intel/esimd.hpp>
21+
#include <sycl/sycl.hpp>
22+
23+
using namespace sycl;
24+
using namespace sycl::ext::intel::esimd;
25+
using namespace sycl::ext::intel::experimental::esimd;
26+
27+
template <unsigned SIMDSize> int testAccessor(queue q) {
28+
auto size = size_t{128};
29+
30+
auto vec_0 = std::vector<int>(size);
31+
auto vec_1 = std::vector<int>(size);
32+
auto vec_2 = std::vector<int>(size);
33+
auto vec_3 = std::vector<int>(size);
34+
35+
std::iota(vec_0.begin(), vec_0.end(), 0);
36+
std::iota(vec_1.begin(), vec_1.end(), 0);
37+
std::iota(vec_2.begin(), vec_2.end(), 0);
38+
std::iota(vec_3.begin(), vec_3.end(), 0);
39+
auto buf_0 = buffer{vec_0};
40+
auto buf_1 = buffer{vec_1};
41+
auto buf_2 = buffer{vec_2};
42+
auto buf_3 = buffer{vec_3};
43+
44+
try {
45+
q.submit([&](handler &h) {
46+
auto access_0 = buf_0.template get_access<access::mode::read_write>(h);
47+
auto access_1 = buf_1.template get_access<access::mode::read_write>(h);
48+
auto access_2 = buf_2.template get_access<access::mode::read_write>(h);
49+
auto access_3 = buf_3.template get_access<access::mode::read_write>(h);
50+
51+
h.parallel_for(
52+
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
53+
auto offset = id[0] * SIMDSize * sizeof(int);
54+
auto offsets = simd<uint32_t, SIMDSize>(id * SIMDSize * sizeof(int),
55+
sizeof(int));
56+
auto pred_enable = simd_mask<1>(1);
57+
auto pred_disable = simd_mask<1>(0);
58+
59+
auto data_0 =
60+
lsc_block_load<int, SIMDSize>(access_0, offset, pred_enable);
61+
lsc_block_store<int, SIMDSize>(access_0, offset, data_0 * 2,
62+
pred_enable);
63+
64+
auto data_1 =
65+
lsc_block_load<int, SIMDSize>(access_1, offset, pred_disable);
66+
lsc_block_store<int, SIMDSize>(access_1, offset, data_1 * 2,
67+
pred_enable);
68+
69+
auto data_2 =
70+
lsc_block_load<int, SIMDSize>(access_2, offset, pred_enable);
71+
lsc_block_store<int, SIMDSize>(access_2, offset, data_2 * 2,
72+
pred_disable);
73+
74+
auto data_3 =
75+
lsc_block_load<int, SIMDSize>(access_3, offset, pred_disable);
76+
lsc_block_store<int, SIMDSize>(access_3, offset, data_3 * 2,
77+
pred_disable);
78+
});
79+
});
80+
q.wait();
81+
buf_0.template get_access<access::mode::read_write>();
82+
buf_1.template get_access<access::mode::read_write>();
83+
buf_2.template get_access<access::mode::read_write>();
84+
buf_3.template get_access<access::mode::read_write>();
85+
} catch (sycl::exception e) {
86+
std::cout << "SYCL exception caught: " << e.what();
87+
return 1;
88+
}
89+
90+
auto error = 0;
91+
for (auto i = 0; i != size; ++i) {
92+
error += vec_0[i] != 2 * i;
93+
error += vec_1[i] > 0;
94+
error += vec_2[i] != i;
95+
error += vec_3[i] != i;
96+
}
97+
std::cout << "Accessor lsc predicate test ";
98+
std::cout << (error != 0 ? "FAILED" : "passed") << std::endl;
99+
return error;
100+
}
101+
102+
template <unsigned SIMDSize> int testUSM(queue q) {
103+
auto size = size_t{128};
104+
105+
auto *vec_0 = malloc_shared<int>(size, q);
106+
auto *vec_1 = malloc_shared<int>(size, q);
107+
auto *vec_2 = malloc_shared<int>(size, q);
108+
auto *vec_3 = malloc_shared<int>(size, q);
109+
std::iota(vec_0, vec_0 + size, 0);
110+
std::iota(vec_1, vec_1 + size, 0);
111+
std::iota(vec_2, vec_2 + size, 0);
112+
std::iota(vec_3, vec_3 + size, 0);
113+
114+
try {
115+
q.submit([&](handler &h) {
116+
h.parallel_for(
117+
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
118+
auto offset = id[0] * SIMDSize;
119+
auto offsets = simd<uint32_t, SIMDSize>(id * SIMDSize * sizeof(int),
120+
sizeof(int));
121+
auto pred_enable = simd_mask<1>(1);
122+
auto pred_disable = simd_mask<1>(0);
123+
124+
auto data_0 =
125+
lsc_block_load<int, SIMDSize>(vec_0 + offset, pred_enable);
126+
lsc_block_store<int, SIMDSize>(vec_0 + offset, data_0 * 2,
127+
pred_enable);
128+
129+
auto data_1 =
130+
lsc_block_load<int, SIMDSize>(vec_1 + offset, pred_disable);
131+
lsc_block_store<int, SIMDSize>(vec_1 + offset, data_1 * 2,
132+
pred_enable);
133+
134+
auto data_2 =
135+
lsc_block_load<int, SIMDSize>(vec_2 + offset, pred_enable);
136+
lsc_block_store<int, SIMDSize>(vec_2 + offset, data_2 * 2,
137+
pred_disable);
138+
auto data_3 =
139+
lsc_block_load<int, SIMDSize>(vec_3 + offset, pred_disable);
140+
lsc_block_store<int, SIMDSize>(vec_3 + offset, data_3 * 2,
141+
pred_disable);
142+
});
143+
});
144+
q.wait();
145+
} catch (sycl::exception e) {
146+
std::cout << "SYCL exception caught: " << e.what();
147+
sycl::free(vec_0, q);
148+
sycl::free(vec_1, q);
149+
sycl::free(vec_2, q);
150+
sycl::free(vec_3, q);
151+
return 1;
152+
}
153+
154+
int error = 0;
155+
for (auto i = 0; i != size; ++i) {
156+
error += vec_0[i] != 2 * i;
157+
error += vec_1[i] > 0;
158+
error += vec_2[i] != i;
159+
error += vec_3[i] != i;
160+
}
161+
sycl::free(vec_0, q);
162+
sycl::free(vec_1, q);
163+
sycl::free(vec_2, q);
164+
sycl::free(vec_3, q);
165+
std::cout << "USM lsc predicate test ";
166+
std::cout << (error != 0 ? "FAILED" : "passed") << std::endl;
167+
return error;
168+
}
169+
170+
int main() {
171+
172+
auto q =
173+
queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()};
174+
auto device = q.get_device();
175+
std::cout << "Device name: " << device.get_info<info::device::name>()
176+
<< std::endl;
177+
178+
int error = testUSM<8>(q);
179+
error = testUSM<16>(q);
180+
error = testUSM<32>(q);
181+
182+
error += testAccessor<8>(q);
183+
error += testAccessor<16>(q);
184+
error += testAccessor<32>(q);
185+
return error;
186+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//==------------ lsc_predicate_stateless.cpp - DPC++ ESIMD on-device test -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu-intel-pvc || esimd_emulator
9+
// RUN: %clangxx -fsycl -fsycl-esimd-force-stateless-mem %s -o %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
12+
// The test checks functionality of the lsc_block_load, lsc_block_store
13+
// accessor - based ESIMD intrinsics when stateless memory accesses are
14+
// enforced, i.e. accessor based accesses are automatically converted to
15+
// stateless accesses with newly introduced predicate parameter.
16+
17+
#include "../esimd_test_utils.hpp"
18+
19+
#include <algorithm>
20+
#include <cmath>
21+
#include <numeric>
22+
#include <sycl/ext/intel/esimd.hpp>
23+
#include <sycl/sycl.hpp>
24+
25+
using namespace sycl;
26+
using namespace sycl::ext::intel::esimd;
27+
using namespace sycl::ext::intel::experimental::esimd;
28+
29+
template <unsigned SIMDSize> int testAccessor(queue q) {
30+
auto size = size_t{128};
31+
32+
auto vec_0 = std::vector<int>(size);
33+
auto vec_1 = std::vector<int>(size);
34+
auto vec_2 = std::vector<int>(size);
35+
auto vec_3 = std::vector<int>(size);
36+
37+
std::iota(vec_0.begin(), vec_0.end(), 0);
38+
std::iota(vec_1.begin(), vec_1.end(), 0);
39+
std::iota(vec_2.begin(), vec_2.end(), 0);
40+
std::iota(vec_3.begin(), vec_3.end(), 0);
41+
auto buf_0 = buffer{vec_0};
42+
auto buf_1 = buffer{vec_1};
43+
auto buf_2 = buffer{vec_2};
44+
auto buf_3 = buffer{vec_3};
45+
46+
try {
47+
q.submit([&](handler &h) {
48+
auto access_0 = buf_0.template get_access<access::mode::read_write>(h);
49+
auto access_1 = buf_1.template get_access<access::mode::read_write>(h);
50+
auto access_2 = buf_2.template get_access<access::mode::read_write>(h);
51+
auto access_3 = buf_3.template get_access<access::mode::read_write>(h);
52+
53+
h.parallel_for(
54+
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
55+
auto offset = id[0] * SIMDSize * sizeof(int);
56+
auto offsets = simd<uint32_t, SIMDSize>(id * SIMDSize * sizeof(int),
57+
sizeof(int));
58+
auto pred_enable = simd_mask<1>(1);
59+
auto pred_disable = simd_mask<1>(0);
60+
61+
auto data_0 =
62+
lsc_block_load<int, SIMDSize>(access_0, offset, pred_enable);
63+
lsc_block_store<int, SIMDSize>(access_0, offset, data_0 * 2,
64+
pred_enable);
65+
66+
auto data_1 =
67+
lsc_block_load<int, SIMDSize>(access_1, offset, pred_disable);
68+
lsc_block_store<int, SIMDSize>(access_1, offset, data_1 * 2,
69+
pred_enable);
70+
71+
auto data_2 =
72+
lsc_block_load<int, SIMDSize>(access_2, offset, pred_enable);
73+
lsc_block_store<int, SIMDSize>(access_2, offset, data_2 * 2,
74+
pred_disable);
75+
76+
auto data_3 =
77+
lsc_block_load<int, SIMDSize>(access_3, offset, pred_disable);
78+
lsc_block_store<int, SIMDSize>(access_3, offset, data_3 * 2,
79+
pred_disable);
80+
});
81+
});
82+
q.wait();
83+
buf_0.template get_access<access::mode::read_write>();
84+
buf_1.template get_access<access::mode::read_write>();
85+
buf_2.template get_access<access::mode::read_write>();
86+
buf_3.template get_access<access::mode::read_write>();
87+
} catch (sycl::exception e) {
88+
std::cout << "SYCL exception caught: " << e.what();
89+
return 1;
90+
}
91+
92+
auto error = 0;
93+
for (auto i = 0; i != size; ++i) {
94+
error += vec_0[i] != 2 * i;
95+
error += vec_1[i] > 0;
96+
error += vec_2[i] != i;
97+
error += vec_3[i] != i;
98+
}
99+
std::cout << "Accessor lsc predicate test ";
100+
std::cout << (error != 0 ? "FAILED" : "passed") << std::endl;
101+
return error;
102+
}
103+
104+
int main() {
105+
106+
auto q =
107+
queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()};
108+
auto device = q.get_device();
109+
std::cout << "Device name: " << device.get_info<info::device::name>()
110+
<< std::endl;
111+
112+
int error = testAccessor<8>(q);
113+
error += testAccessor<16>(q);
114+
error += testAccessor<32>(q);
115+
return error;
116+
}

0 commit comments

Comments
 (0)