Skip to content

[SYCL][NFC] Update sub_group test for native subgroups mode #1973

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions sycl/test/sub_group/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
try {
nd_range<1> NdRange(G, L);
buffer<struct Data, 1> syclbuf(G);

buffer<size_t> sgsizebuf(1);
Queue.submit([&](handler &cgh) {
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
Expand All @@ -46,27 +47,22 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
syclacc[NdItem.get_global_id()].group_range = SG.get_group_range();
syclacc[NdItem.get_global_id()].uniform_group_range =
SG.get_uniform_group_range();
if (NdItem.get_global_id(0) == 0)
sgsizeacc[0] = SG.get_max_local_range()[0];
});
});
auto syclacc = syclbuf.get_access<access::mode::read_write>();
unsigned int max_sg = get_sg_size(Queue.get_device());
unsigned int num_sg = L / max_sg + (L % max_sg ? 1 : 0);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
unsigned int sg_size = sgsizeacc[0];
unsigned int num_sg = L / sg_size + (L % sg_size ? 1 : 0);
for (int j = 0; j < G; j++) {
unsigned int group_id = j % L / max_sg;
unsigned int group_id = j % L / sg_size;
unsigned int local_range =
(group_id + 1 == num_sg) ? (L - group_id * max_sg) : max_sg;
exit_if_not_equal(syclacc[j].local_id, j % L % max_sg, "local_id");
(group_id + 1 == num_sg) ? (L - group_id * sg_size) : sg_size;
exit_if_not_equal(syclacc[j].local_id, j % L % sg_size, "local_id");
exit_if_not_equal(syclacc[j].local_range, local_range, "local_range");
// TODO: Currently workgroup size affects this paramater on CPU and does
// not on GPU. Remove if when it is aligned.
if (Queue.get_device().get_info<info::device::device_type>() ==
info::device_type::cpu) {
exit_if_not_equal(syclacc[j].max_local_range, std::min(max_sg, L),
"max_local_range");
} else {
exit_if_not_equal(syclacc[j].max_local_range, max_sg,
"max_local_range");
}
exit_if_not_equal(syclacc[j].max_local_range,
syclacc[0].max_local_range, "max_local_range");
exit_if_not_equal(syclacc[j].group_id, group_id, "group_id");
exit_if_not_equal(syclacc[j].group_range, num_sg, "group_range");
exit_if_not_equal(syclacc[j].uniform_group_range, num_sg,
Expand Down
17 changes: 0 additions & 17 deletions sycl/test/sub_group/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,23 +133,6 @@ void exit_if_not_equal_vec(vec<T, N> val, vec<T, N> ref, const char *name) {
}
}

/* CPU returns max number of SG, GPU returns max SG size for
* CL_DEVICE_MAX_NUM_SUB_GROUPS device parameter. This function aligns the
* value.
* */
inline size_t get_sg_size(const device &Device) {
size_t max_num_sg = Device.get_info<info::device::max_num_sub_groups>();
if (Device.get_info<info::device::device_type>() == info::device_type::cpu) {
size_t max_wg_size = Device.get_info<info::device::max_work_group_size>();
return max_wg_size / max_num_sg;
}
if (Device.get_info<info::device::device_type>() == info::device_type::gpu) {
return max_num_sg;
}
std::cout << "Unexpected deive type" << std::endl;
exit(1);
}

bool core_sg_supported(const device &Device) {
return (Device.has_extension("cl_khr_subgroups") ||
Device.get_info<info::device::version>().find(" 2.1") !=
Expand Down
25 changes: 13 additions & 12 deletions sycl/test/sub_group/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,8 @@ int main() {
/* Check info::device parameters. */
Device.get_info<info::device::sub_group_independent_forward_progress>();
Device.get_info<info::device::max_num_sub_groups>();
/* sub_group_sizes can be quared only of cl_intel_required_subgroup_size
* extention is supported by device*/
if (Device.has_extension("cl_intel_required_subgroup_size"))
Device.get_info<info::device::sub_group_sizes>();

try {
size_t max_sg_num = get_sg_size(Device);
size_t max_wg_size = Device.get_info<info::device::max_work_group_size>();
program Prog(Queue.get_context());
/* TODO: replace with pure SYCL code when fixed problem with consumption
Expand All @@ -56,13 +51,19 @@ int main() {
"global double* c) {*a=*b+*c; }\n");
kernel Kernel = Prog.get_kernel("kernel_sg");
uint32_t Res = 0;
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
Res = Kernel.get_sub_group_info<
info::kernel_sub_group::max_sub_group_size>(Device, r);
bool Expected = (Res == r.size() || Res == max_sg_num);
exit_if_not_equal<bool>(Expected, true,
"max_sub_group_size");

/* sub_group_sizes can be quared only of cl_intel_required_subgroup_size
* extention is supported by device*/
if (Device.has_extension("cl_intel_required_subgroup_size")) {
auto sg_sizes = Device.get_info<info::device::sub_group_sizes>();
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
Res = Kernel.get_sub_group_info<
info::kernel_sub_group::max_sub_group_size>(Device, r);
bool Expected =
std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end();
exit_if_not_equal<bool>(Expected, true, "max_sub_group_size");
}
}

Res = Kernel.get_sub_group_info<
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/sub_group/load_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ template <typename T, int N> class sycl_subgr;
using namespace cl::sycl;

template <typename T, int N> void check(queue &Queue) {
const int G = 1024, L = 64;
const int G = 1024, L = 128;
try {
nd_range<1> NdRange(G, L);
buffer<T> syclbuf(G);
Expand Down
7 changes: 6 additions & 1 deletion sycl/test/sub_group/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
try {
nd_range<1> NdRange(G, L);
buffer<T> buf(G);
buffer<size_t> sgsizebuf(1);
Queue.submit([&](handler &cgh) {
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
auto acc = buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
NdRange, [=](nd_item<1> NdItem) {
Expand All @@ -42,10 +44,13 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
acc[NdItem.get_global_id(0)] =
reduce(sg, T(NdItem.get_global_id(0)), init, op);
}
if (NdItem.get_global_id(0) == 0)
sgsizeacc[0] = sg.get_max_local_range()[0];
});
});
auto acc = buf.template get_access<access::mode::read_write>();
size_t sg_size = get_sg_size(Queue.get_device());
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
size_t sg_size = sgsizeacc[0];
int WGid = -1, SGid = 0;
T result = init;
for (int j = 0; j < G; j++) {
Expand Down
7 changes: 6 additions & 1 deletion sycl/test/sub_group/scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
try {
nd_range<1> NdRange(G, L);
buffer<T> exbuf(G), inbuf(G);
buffer<size_t> sgsizebuf(1);
Queue.submit([&](handler &cgh) {
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
auto exacc = exbuf.template get_access<access::mode::read_write>(cgh);
auto inacc = inbuf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
Expand All @@ -48,11 +50,14 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
inacc[NdItem.get_global_id(0)] =
inclusive_scan(sg, T(NdItem.get_global_id(0)), op, init);
}
if (NdItem.get_global_id(0) == 0)
sgsizeacc[0] = sg.get_max_local_range()[0];
});
});
auto exacc = exbuf.template get_access<access::mode::read_write>();
auto inacc = inbuf.template get_access<access::mode::read_write>();
size_t sg_size = get_sg_size(Queue.get_device());
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
size_t sg_size = sgsizeacc[0];
int WGid = -1, SGid = 0;
T result = init;
for (int j = 0; j < G; j++) {
Expand Down
9 changes: 3 additions & 6 deletions sycl/test/sub_group/vote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ using namespace cl::sycl;

void check(queue Queue, const int G, const int L, const int D, const int R) {
try {
int max_sg =
Queue.get_device().get_info<info::device::max_num_sub_groups>();
int num_sg = (L) / max_sg + ((L) % max_sg ? 1 : 0);
range<1> GRange(G), LRange(L);
nd_range<1> NdRange(GRange, LRange);
buffer<int, 1> sganybuf(G);
Expand Down Expand Up @@ -82,8 +79,8 @@ int main() {
std::cout << "Skipping test\n";
return 0;
}
check(Queue, 240, 80, 9, 8);
check(Queue, 24, 12, 9, 10);
check(Queue, 1024, 256, 9, 8);
check(Queue, 240, 80, 3, 1);
check(Queue, 24, 12, 3, 4);
check(Queue, 1024, 256, 3, 1);
std::cout << "Test passed." << std::endl;
}