Skip to content

Commit f9e8ffc

Browse files
authored
[NFC][SYCL] Stabilize sub_group LIT tests (#2253)
- Fixed OCL_ICD_FILENAMES capturing to LIT environment. - Disable sub_group tests which fail on some ISAs on OpenCL CPU. - Change local and global workgroup sizes to make them dividend of all possible sub_group sizes. That will fix test failures in the last sub_group of local group. - Remove use of deprecated shuffle methods. - Fixed tests where sub_group local ID exceed sub_group size.
1 parent 852d05a commit f9e8ffc

File tree

11 files changed

+39
-97
lines changed

11 files changed

+39
-97
lines changed

sycl/test/lit.cfg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
config.test_exec_root = os.path.join(config.sycl_obj_root, 'test')
3737

3838
# Propagate some variables from the host environment.
39-
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAME', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])
39+
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAMES', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])
4040

4141
# Configure LD_LIBRARY_PATH or corresponding os-specific alternatives
4242
if platform.system() == "Linux":

sycl/test/sub_group/broadcast.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88

99
#include "helper.hpp"
1010
#include <CL/sycl.hpp>
11-
template <typename T>
12-
class sycl_subgr;
11+
template <typename T> class sycl_subgr;
1312
using namespace cl::sycl;
14-
template <typename T>
15-
void check(queue &Queue) {
16-
const int G = 240, L = 60;
13+
template <typename T> void check(queue &Queue) {
14+
const int G = 256, L = 64;
1715
try {
1816
nd_range<1> NdRange(G, L);
1917
buffer<T> syclbuf(G);
@@ -23,9 +21,10 @@ void check(queue &Queue) {
2321
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
2422
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
2523
intel::sub_group SG = NdItem.get_sub_group();
26-
/*Broadcast GID of element with SGLID == SGID */
24+
/*Broadcast GID of element with SGLID == SGID % SGMLR*/
2725
syclacc[NdItem.get_global_id()] =
28-
broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
26+
broadcast(SG, T(NdItem.get_global_id(0)),
27+
SG.get_group_id() % SG.get_max_local_range()[0]);
2928
if (NdItem.get_global_id(0) == 0)
3029
sgsizeacc[0] = SG.get_max_local_range()[0];
3130
});
@@ -44,7 +43,8 @@ void check(queue &Queue) {
4443
WGid++;
4544
SGid = 0;
4645
}
47-
exit_if_not_equal<T>(syclacc[j], L * WGid + SGid + SGid * sg_size,
46+
exit_if_not_equal<T>(syclacc[j],
47+
L * WGid + SGid % sg_size + SGid * sg_size,
4848
"broadcasted value");
4949
}
5050
} catch (exception e) {

sycl/test/sub_group/generic-shuffle.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// UNSUPPORTED: cuda || cpu
1+
// UNSUPPORTED: cuda
22
// CUDA compilation and runtime do not yet support sub-groups.
3-
// #2245 failed on OpenCL CPU (2020.10.7.0.15) with avx2 instruction set
43
//
54
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
65
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
@@ -24,7 +23,7 @@ template <typename T> class pointer_kernel;
2423
using namespace cl::sycl;
2524

2625
template <typename T>
27-
void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
26+
void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) {
2827
try {
2928
nd_range<1> NdRange(G, L);
3029
buffer<T *> buf(G);
@@ -118,7 +117,7 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
118117
}
119118

120119
template <typename T, typename Generator>
121-
void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
120+
void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) {
122121

123122
// Fill a vector with values that will be shuffled
124123
std::vector<T> values(G);

sycl/test/sub_group/load_store.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/reduce.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/reduce.hpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99
#include "helper.hpp"
1010
#include <CL/sycl.hpp>
1111

12-
template <typename T, class BinaryOperation>
13-
class sycl_subgr;
12+
template <typename T, class BinaryOperation> class sycl_subgr;
1413

1514
using namespace cl::sycl;
1615

1716
template <typename T, class BinaryOperation>
1817
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
19-
size_t G = 240, size_t L = 60) {
18+
size_t G = 256, size_t L = 64) {
2019
try {
2120
nd_range<1> NdRange(G, L);
2221
buffer<T> buf(G);
@@ -65,8 +64,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
6564
}
6665
}
6766

68-
template <typename T>
69-
void check(queue &Queue, size_t G = 240, size_t L = 60) {
67+
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
7068
// limit data range for half to avoid rounding issues
7169
if (std::is_same<T, cl::sycl::half>::value) {
7270
G = 64;

sycl/test/sub_group/reduce_fp64.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/scan.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/scan.hpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010
#include <CL/sycl.hpp>
1111
#include <limits>
1212

13-
template <typename T, class BinaryOperation>
14-
class sycl_subgr;
13+
template <typename T, class BinaryOperation> class sycl_subgr;
1514

1615
using namespace cl::sycl;
1716

1817
template <typename T, class BinaryOperation>
1918
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
20-
size_t G = 120, size_t L = 60) {
19+
size_t G = 256, size_t L = 64) {
2120
try {
2221
nd_range<1> NdRange(G, L);
2322
buffer<T> exbuf(G), inbuf(G);
@@ -73,8 +72,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
7372
}
7473
}
7574

76-
template <typename T>
77-
void check(queue &Queue, size_t G = 120, size_t L = 60) {
75+
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
7876
// limit data range for half to avoid rounding issues
7977
if (std::is_same<T, cl::sycl::half>::value) {
8078
G = 64;

sycl/test/sub_group/scan_fp64.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/shuffle.hpp

Lines changed: 6 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,15 @@ using namespace cl::sycl;
1717
struct wa_half;
1818

1919
template <typename T, int N>
20-
void check(queue &Queue, size_t G = 240, size_t L = 60) {
20+
void check(queue &Queue, size_t G = 256, size_t L = 64) {
2121
try {
2222
nd_range<1> NdRange(G, L);
23-
buffer<vec<T, N>> buf2(G);
24-
buffer<vec<T, N>> buf2_up(G);
25-
buffer<vec<T, N>> buf2_down(G);
2623
buffer<vec<T, N>> buf(G);
2724
buffer<vec<T, N>> buf_up(G);
2825
buffer<vec<T, N>> buf_down(G);
2926
buffer<vec<T, N>> buf_xor(G);
3027
buffer<size_t> sgsizebuf(1);
3128
Queue.submit([&](handler &cgh) {
32-
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
33-
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
34-
auto acc2_down =
35-
buf2_down.template get_access<access::mode::read_write>(cgh);
36-
3729
auto acc = buf.template get_access<access::mode::read_write>(cgh);
3830
auto acc_up = buf_up.template get_access<access::mode::read_write>(cgh);
3931
auto acc_down =
@@ -48,15 +40,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
4840
vec<T, N> vwggid(wggid), vsgid(sgid);
4941
if (wggid == 0)
5042
sgsizeacc[0] = SG.get_max_local_range()[0];
51-
/* 1 for odd subgroups and 2 for even*/
52-
acc2[NdItem.get_global_id()] =
53-
SG.shuffle(vec<T, N>(1), vec<T, N>(2),
54-
(sgid % 2) ? 1 : SG.get_max_local_range()[0]);
55-
/* GID-SGID */
56-
acc2_up[NdItem.get_global_id()] = SG.shuffle_up(vwggid, vwggid, sgid);
57-
/* GID-SGID or SGLID if GID+SGID > SGsize*/
58-
acc2_down[NdItem.get_global_id()] =
59-
SG.shuffle_down(vwggid, vec<T, N>(SG.get_local_id().get(0)), sgid);
6043

6144
/*GID of middle element in every subgroup*/
6245
acc[NdItem.get_global_id()] =
@@ -73,9 +56,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
7356
auto acc = buf.template get_access<access::mode::read_write>();
7457
auto acc_up = buf_up.template get_access<access::mode::read_write>();
7558
auto acc_down = buf_down.template get_access<access::mode::read_write>();
76-
auto acc2 = buf2.template get_access<access::mode::read_write>();
77-
auto acc2_up = buf2_up.template get_access<access::mode::read_write>();
78-
auto acc2_down = buf2_down.template get_access<access::mode::read_write>();
7959
auto acc_xor = buf_xor.template get_access<access::mode::read_write>();
8060
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
8161

@@ -98,28 +78,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
9878
exit_if_not_equal_vec<T, N>(
9979
acc[j], vec<T, N>(j / L * L + SGid * sg_size + sg_size / 2),
10080
"shuffle");
101-
/* 1 for odd subgroups and 2 for even*/
102-
exit_if_not_equal_vec<T, N>(acc2[j], vec<T, N>((SGid % 2) ? 1 : 2),
103-
"shuffle2");
10481
/* Value GID+SGID for all element except last SGID in SG*/
10582
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
106-
exit_if_not_equal_vec(acc_down[j], vec<T, N>(j + SGid), "shuffle_down");
107-
exit_if_not_equal_vec(acc2_down[j], vec<T, N>(j + SGid),
108-
"shuffle2_down");
109-
} else { /* SGLID for GID+SGid */
110-
if (j % L + SGid < L) /* Do not go out LG*/
111-
exit_if_not_equal_vec<T, N>(acc2_down[j],
112-
vec<T, N>((j + SGid) % L % sg_size),
113-
"shuffle2_down");
83+
exit_if_not_equal_vec(acc_down[j], vec<T, N>(j + SGid % sg_size),
84+
"shuffle_down");
11485
}
11586
/* Value GID-SGID for all element except first SGID in SG*/
11687
if (j % L % sg_size >= SGid) {
117-
exit_if_not_equal_vec(acc_up[j], vec<T, N>(j - SGid), "shuffle_up");
118-
exit_if_not_equal_vec(acc2_up[j], vec<T, N>(j - SGid), "shuffle2_up");
119-
} else { /* SGLID for GID-SGid */
120-
if (j % L - SGid + sg_size < L) /* Do not go out LG*/
121-
exit_if_not_equal_vec(acc2_up[j], vec<T, N>(j - SGid + sg_size),
122-
"shuffle2_up");
88+
exit_if_not_equal_vec(acc_up[j], vec<T, N>(j - SGid % sg_size),
89+
"shuffle_up");
12390
}
12491
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
12592
exit_if_not_equal_vec(acc_xor[j],
@@ -133,23 +100,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
133100
}
134101
}
135102

136-
template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
103+
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
137104
try {
138105
nd_range<1> NdRange(G, L);
139-
buffer<T> buf2(G);
140-
buffer<T> buf2_up(G);
141-
buffer<T> buf2_down(G);
142106
buffer<T> buf(G);
143107
buffer<T> buf_up(G);
144108
buffer<T> buf_down(G);
145109
buffer<T> buf_xor(G);
146110
buffer<size_t> sgsizebuf(1);
147111
Queue.submit([&](handler &cgh) {
148-
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
149-
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
150-
auto acc2_down =
151-
buf2_down.template get_access<access::mode::read_write>(cgh);
152-
153112
auto acc = buf.template get_access<access::mode::read_write>(cgh);
154113
auto acc_up = buf_up.template get_access<access::mode::read_write>(cgh);
155114
auto acc_down =
@@ -163,14 +122,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
163122
uint32_t sgid = SG.get_group_id().get(0);
164123
if (wggid == 0)
165124
sgsizeacc[0] = SG.get_max_local_range()[0];
166-
/* 1 for odd subgroups and 2 for even*/
167-
acc2[NdItem.get_global_id()] =
168-
SG.shuffle<T>(1, 2, (sgid % 2) ? 1 : SG.get_max_local_range()[0]);
169-
/* GID-SGID */
170-
acc2_up[NdItem.get_global_id()] = SG.shuffle_up<T>(wggid, wggid, sgid);
171-
/* GID-SGID or SGLID if GID+SGID > SGsize*/
172-
acc2_down[NdItem.get_global_id()] =
173-
SG.shuffle_down<T>(wggid, SG.get_local_id().get(0), sgid);
174125

175126
/*GID of middle element in every subgroup*/
176127
acc[NdItem.get_global_id()] =
@@ -187,9 +138,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
187138
auto acc = buf.template get_access<access::mode::read_write>();
188139
auto acc_up = buf_up.template get_access<access::mode::read_write>();
189140
auto acc_down = buf_down.template get_access<access::mode::read_write>();
190-
auto acc2 = buf2.template get_access<access::mode::read_write>();
191-
auto acc2_up = buf2_up.template get_access<access::mode::read_write>();
192-
auto acc2_down = buf2_down.template get_access<access::mode::read_write>();
193141
auto acc_xor = buf_xor.template get_access<access::mode::read_write>();
194142
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
195143

@@ -212,24 +160,13 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
212160
/*GID of middle element in every subgroup*/
213161
exit_if_not_equal<T>(acc[j], j / L * L + SGid * sg_size + sg_size / 2,
214162
"shuffle");
215-
/* 1 for odd subgroups and 2 for even*/
216-
exit_if_not_equal<T>(acc2[j], (SGid % 2) ? 1 : 2, "shuffle2");
217163
/* Value GID+SGID for all element except last SGID in SG*/
218164
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
219165
exit_if_not_equal<T>(acc_down[j], j + SGid, "shuffle_down");
220-
exit_if_not_equal<T>(acc2_down[j], j + SGid, "shuffle2_down");
221-
} else { /* SGLID for GID+SGid */
222-
if (j % L + SGid < L) /* Do not go out LG*/
223-
exit_if_not_equal<T>(acc2_down[j], (j + SGid) % L % sg_size,
224-
"shuffle2_down");
225166
}
226167
/* Value GID-SGID for all element except first SGID in SG*/
227168
if (j % L % sg_size >= SGid) {
228169
exit_if_not_equal<T>(acc_up[j], j - SGid, "shuffle_up");
229-
exit_if_not_equal<T>(acc2_up[j], j - SGid, "shuffle2_up");
230-
} else { /* SGLID for GID-SGid */
231-
if (j % L - SGid + sg_size < L) /* Do not go out LG*/
232-
exit_if_not_equal<T>(acc2_up[j], j - SGid + sg_size, "shuffle2_up");
233170
}
234171
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
235172
exit_if_not_equal<T>(acc_xor[j], SGBeginGid + (SGLid ^ (SGid % sg_size)),

0 commit comments

Comments
 (0)