Skip to content

Commit 539972d

Browse files
authored
[SYCL][NFC] Update sub_group test for native subgroups mode (#1973)
Previously, the execution mode of CPU subgroup is emulation mode, which means a subgroup == a workgroup. Since OCL CPU RT has enabled native subgroups by default, we need to update these sycl subgroup tests.
1 parent d9bad0b commit 539972d

File tree

7 files changed

+41
-54
lines changed

7 files changed

+41
-54
lines changed

sycl/test/sub_group/common.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
3232
try {
3333
nd_range<1> NdRange(G, L);
3434
buffer<struct Data, 1> syclbuf(G);
35-
35+
buffer<size_t> sgsizebuf(1);
3636
Queue.submit([&](handler &cgh) {
37+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
3738
auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
3839
cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
3940
intel::sub_group SG = NdItem.get_sub_group();
@@ -46,27 +47,22 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
4647
syclacc[NdItem.get_global_id()].group_range = SG.get_group_range();
4748
syclacc[NdItem.get_global_id()].uniform_group_range =
4849
SG.get_uniform_group_range();
50+
if (NdItem.get_global_id(0) == 0)
51+
sgsizeacc[0] = SG.get_max_local_range()[0];
4952
});
5053
});
5154
auto syclacc = syclbuf.get_access<access::mode::read_write>();
52-
unsigned int max_sg = get_sg_size(Queue.get_device());
53-
unsigned int num_sg = L / max_sg + (L % max_sg ? 1 : 0);
55+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
56+
unsigned int sg_size = sgsizeacc[0];
57+
unsigned int num_sg = L / sg_size + (L % sg_size ? 1 : 0);
5458
for (int j = 0; j < G; j++) {
55-
unsigned int group_id = j % L / max_sg;
59+
unsigned int group_id = j % L / sg_size;
5660
unsigned int local_range =
57-
(group_id + 1 == num_sg) ? (L - group_id * max_sg) : max_sg;
58-
exit_if_not_equal(syclacc[j].local_id, j % L % max_sg, "local_id");
61+
(group_id + 1 == num_sg) ? (L - group_id * sg_size) : sg_size;
62+
exit_if_not_equal(syclacc[j].local_id, j % L % sg_size, "local_id");
5963
exit_if_not_equal(syclacc[j].local_range, local_range, "local_range");
60-
// TODO: Currently workgroup size affects this paramater on CPU and does
61-
// not on GPU. Remove if when it is aligned.
62-
if (Queue.get_device().get_info<info::device::device_type>() ==
63-
info::device_type::cpu) {
64-
exit_if_not_equal(syclacc[j].max_local_range, std::min(max_sg, L),
65-
"max_local_range");
66-
} else {
67-
exit_if_not_equal(syclacc[j].max_local_range, max_sg,
68-
"max_local_range");
69-
}
64+
exit_if_not_equal(syclacc[j].max_local_range,
65+
syclacc[0].max_local_range, "max_local_range");
7066
exit_if_not_equal(syclacc[j].group_id, group_id, "group_id");
7167
exit_if_not_equal(syclacc[j].group_range, num_sg, "group_range");
7268
exit_if_not_equal(syclacc[j].uniform_group_range, num_sg,

sycl/test/sub_group/helper.hpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -133,23 +133,6 @@ void exit_if_not_equal_vec(vec<T, N> val, vec<T, N> ref, const char *name) {
133133
}
134134
}
135135

136-
/* CPU returns max number of SG, GPU returns max SG size for
137-
* CL_DEVICE_MAX_NUM_SUB_GROUPS device parameter. This function aligns the
138-
* value.
139-
* */
140-
inline size_t get_sg_size(const device &Device) {
141-
size_t max_num_sg = Device.get_info<info::device::max_num_sub_groups>();
142-
if (Device.get_info<info::device::device_type>() == info::device_type::cpu) {
143-
size_t max_wg_size = Device.get_info<info::device::max_work_group_size>();
144-
return max_wg_size / max_num_sg;
145-
}
146-
if (Device.get_info<info::device::device_type>() == info::device_type::gpu) {
147-
return max_num_sg;
148-
}
149-
std::cout << "Unexpected deive type" << std::endl;
150-
exit(1);
151-
}
152-
153136
bool core_sg_supported(const device &Device) {
154137
return (Device.has_extension("cl_khr_subgroups") ||
155138
Device.get_info<info::device::version>().find(" 2.1") !=

sycl/test/sub_group/info.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,8 @@ int main() {
3232
/* Check info::device parameters. */
3333
Device.get_info<info::device::sub_group_independent_forward_progress>();
3434
Device.get_info<info::device::max_num_sub_groups>();
35-
/* sub_group_sizes can be quared only of cl_intel_required_subgroup_size
36-
* extention is supported by device*/
37-
if (Device.has_extension("cl_intel_required_subgroup_size"))
38-
Device.get_info<info::device::sub_group_sizes>();
3935

4036
try {
41-
size_t max_sg_num = get_sg_size(Device);
4237
size_t max_wg_size = Device.get_info<info::device::max_work_group_size>();
4338
program Prog(Queue.get_context());
4439
/* TODO: replace with pure SYCL code when fixed problem with consumption
@@ -56,13 +51,19 @@ int main() {
5651
"global double* c) {*a=*b+*c; }\n");
5752
kernel Kernel = Prog.get_kernel("kernel_sg");
5853
uint32_t Res = 0;
59-
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
60-
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
61-
Res = Kernel.get_sub_group_info<
62-
info::kernel_sub_group::max_sub_group_size>(Device, r);
63-
bool Expected = (Res == r.size() || Res == max_sg_num);
64-
exit_if_not_equal<bool>(Expected, true,
65-
"max_sub_group_size");
54+
55+
/* sub_group_sizes can be quared only of cl_intel_required_subgroup_size
56+
* extention is supported by device*/
57+
if (Device.has_extension("cl_intel_required_subgroup_size")) {
58+
auto sg_sizes = Device.get_info<info::device::sub_group_sizes>();
59+
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
60+
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
61+
Res = Kernel.get_sub_group_info<
62+
info::kernel_sub_group::max_sub_group_size>(Device, r);
63+
bool Expected =
64+
std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end();
65+
exit_if_not_equal<bool>(Expected, true, "max_sub_group_size");
66+
}
6667
}
6768

6869
Res = Kernel.get_sub_group_info<

sycl/test/sub_group/load_store.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ template <typename T, int N> class sycl_subgr;
2222
using namespace cl::sycl;
2323

2424
template <typename T, int N> void check(queue &Queue) {
25-
const int G = 1024, L = 64;
25+
const int G = 1024, L = 128;
2626
try {
2727
nd_range<1> NdRange(G, L);
2828
buffer<T> syclbuf(G);

sycl/test/sub_group/reduce.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
3030
try {
3131
nd_range<1> NdRange(G, L);
3232
buffer<T> buf(G);
33+
buffer<size_t> sgsizebuf(1);
3334
Queue.submit([&](handler &cgh) {
35+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
3436
auto acc = buf.template get_access<access::mode::read_write>(cgh);
3537
cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
3638
NdRange, [=](nd_item<1> NdItem) {
@@ -42,10 +44,13 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
4244
acc[NdItem.get_global_id(0)] =
4345
reduce(sg, T(NdItem.get_global_id(0)), init, op);
4446
}
47+
if (NdItem.get_global_id(0) == 0)
48+
sgsizeacc[0] = sg.get_max_local_range()[0];
4549
});
4650
});
4751
auto acc = buf.template get_access<access::mode::read_write>();
48-
size_t sg_size = get_sg_size(Queue.get_device());
52+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
53+
size_t sg_size = sgsizeacc[0];
4954
int WGid = -1, SGid = 0;
5055
T result = init;
5156
for (int j = 0; j < G; j++) {

sycl/test/sub_group/scan.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
3131
try {
3232
nd_range<1> NdRange(G, L);
3333
buffer<T> exbuf(G), inbuf(G);
34+
buffer<size_t> sgsizebuf(1);
3435
Queue.submit([&](handler &cgh) {
36+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
3537
auto exacc = exbuf.template get_access<access::mode::read_write>(cgh);
3638
auto inacc = inbuf.template get_access<access::mode::read_write>(cgh);
3739
cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
@@ -48,11 +50,14 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
4850
inacc[NdItem.get_global_id(0)] =
4951
inclusive_scan(sg, T(NdItem.get_global_id(0)), op, init);
5052
}
53+
if (NdItem.get_global_id(0) == 0)
54+
sgsizeacc[0] = sg.get_max_local_range()[0];
5155
});
5256
});
5357
auto exacc = exbuf.template get_access<access::mode::read_write>();
5458
auto inacc = inbuf.template get_access<access::mode::read_write>();
55-
size_t sg_size = get_sg_size(Queue.get_device());
59+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
60+
size_t sg_size = sgsizeacc[0];
5661
int WGid = -1, SGid = 0;
5762
T result = init;
5863
for (int j = 0; j < G; j++) {

sycl/test/sub_group/vote.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,6 @@ using namespace cl::sycl;
2121

2222
void check(queue Queue, const int G, const int L, const int D, const int R) {
2323
try {
24-
int max_sg =
25-
Queue.get_device().get_info<info::device::max_num_sub_groups>();
26-
int num_sg = (L) / max_sg + ((L) % max_sg ? 1 : 0);
2724
range<1> GRange(G), LRange(L);
2825
nd_range<1> NdRange(GRange, LRange);
2926
buffer<int, 1> sganybuf(G);
@@ -82,8 +79,8 @@ int main() {
8279
std::cout << "Skipping test\n";
8380
return 0;
8481
}
85-
check(Queue, 240, 80, 9, 8);
86-
check(Queue, 24, 12, 9, 10);
87-
check(Queue, 1024, 256, 9, 8);
82+
check(Queue, 240, 80, 3, 1);
83+
check(Queue, 24, 12, 3, 4);
84+
check(Queue, 1024, 256, 3, 1);
8885
std::cout << "Test passed." << std::endl;
8986
}

0 commit comments

Comments
 (0)