Skip to content

[NFC][SYCL] Stabilize sub_group LIT tests #2253

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
config.test_exec_root = os.path.join(config.sycl_obj_root, 'test')

# Propagate some variables from the host environment.
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAME', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAMES', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])

# Configure LD_LIBRARY_PATH or corresponding os-specific alternatives
if platform.system() == "Linux":
Expand Down
16 changes: 8 additions & 8 deletions sycl/test/sub_group/broadcast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@

#include "helper.hpp"
#include <CL/sycl.hpp>
template <typename T>
class sycl_subgr;
template <typename T> class sycl_subgr;
using namespace cl::sycl;
template <typename T>
void check(queue &Queue) {
const int G = 240, L = 60;
template <typename T> void check(queue &Queue) {
const int G = 256, L = 64;
try {
nd_range<1> NdRange(G, L);
buffer<T> syclbuf(G);
Expand All @@ -23,9 +21,10 @@ void check(queue &Queue) {
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
/*Broadcast GID of element with SGLID == SGID */
/*Broadcast GID of element with SGLID == SGID % SGMLR*/
syclacc[NdItem.get_global_id()] =
broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
broadcast(SG, T(NdItem.get_global_id(0)),
SG.get_group_id() % SG.get_max_local_range()[0]);
if (NdItem.get_global_id(0) == 0)
sgsizeacc[0] = SG.get_max_local_range()[0];
});
Expand All @@ -44,7 +43,8 @@ void check(queue &Queue) {
WGid++;
SGid = 0;
}
exit_if_not_equal<T>(syclacc[j], L * WGid + SGid + SGid * sg_size,
exit_if_not_equal<T>(syclacc[j],
L * WGid + SGid % sg_size + SGid * sg_size,
"broadcasted value");
}
} catch (exception e) {
Expand Down
7 changes: 3 additions & 4 deletions sycl/test/sub_group/generic-shuffle.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// UNSUPPORTED: cuda || cpu
// UNSUPPORTED: cuda
// CUDA compilation and runtime do not yet support sub-groups.
// #2245 failed on OpenCL CPU (2020.10.7.0.15) with avx2 instruction set
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand All @@ -24,7 +23,7 @@ template <typename T> class pointer_kernel;
using namespace cl::sycl;

template <typename T>
void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) {
try {
nd_range<1> NdRange(G, L);
buffer<T *> buf(G);
Expand Down Expand Up @@ -118,7 +117,7 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
}

template <typename T, typename Generator>
void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) {

// Fill a vector with values that will be shuffled
std::vector<T> values(G);
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/sub_group/load_store.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: cuda
// UNSUPPORTED: cuda || cpu
// CUDA compilation and runtime do not yet support sub-groups.
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/sub_group/reduce.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: cuda
// UNSUPPORTED: cuda || cpu
// CUDA compilation and runtime do not yet support sub-groups.
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand Down
8 changes: 3 additions & 5 deletions sycl/test/sub_group/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
#include "helper.hpp"
#include <CL/sycl.hpp>

template <typename T, class BinaryOperation>
class sycl_subgr;
template <typename T, class BinaryOperation> class sycl_subgr;

using namespace cl::sycl;

template <typename T, class BinaryOperation>
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
size_t G = 240, size_t L = 60) {
size_t G = 256, size_t L = 64) {
try {
nd_range<1> NdRange(G, L);
buffer<T> buf(G);
Expand Down Expand Up @@ -65,8 +64,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
}
}

template <typename T>
void check(queue &Queue, size_t G = 240, size_t L = 60) {
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
// limit data range for half to avoid rounding issues
if (std::is_same<T, cl::sycl::half>::value) {
G = 64;
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/sub_group/reduce_fp64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: cuda
// UNSUPPORTED: cuda || cpu
// CUDA compilation and runtime do not yet support sub-groups.
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/sub_group/scan.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: cuda
// UNSUPPORTED: cuda || cpu
// CUDA compilation and runtime do not yet support sub-groups.
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand Down
8 changes: 3 additions & 5 deletions sycl/test/sub_group/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@
#include <CL/sycl.hpp>
#include <limits>

template <typename T, class BinaryOperation>
class sycl_subgr;
template <typename T, class BinaryOperation> class sycl_subgr;

using namespace cl::sycl;

template <typename T, class BinaryOperation>
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
size_t G = 120, size_t L = 60) {
size_t G = 256, size_t L = 64) {
try {
nd_range<1> NdRange(G, L);
buffer<T> exbuf(G), inbuf(G);
Expand Down Expand Up @@ -73,8 +72,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
}
}

template <typename T>
void check(queue &Queue, size_t G = 120, size_t L = 60) {
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
// limit data range for half to avoid rounding issues
if (std::is_same<T, cl::sycl::half>::value) {
G = 64;
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/sub_group/scan_fp64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// UNSUPPORTED: cuda
// UNSUPPORTED: cuda || cpu
// CUDA compilation and runtime do not yet support sub-groups.
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
Expand Down
75 changes: 6 additions & 69 deletions sycl/test/sub_group/shuffle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,15 @@ using namespace cl::sycl;
struct wa_half;

template <typename T, int N>
void check(queue &Queue, size_t G = 240, size_t L = 60) {
void check(queue &Queue, size_t G = 256, size_t L = 64) {
try {
nd_range<1> NdRange(G, L);
buffer<vec<T, N>> buf2(G);
buffer<vec<T, N>> buf2_up(G);
buffer<vec<T, N>> buf2_down(G);
buffer<vec<T, N>> buf(G);
buffer<vec<T, N>> buf_up(G);
buffer<vec<T, N>> buf_down(G);
buffer<vec<T, N>> buf_xor(G);
buffer<size_t> sgsizebuf(1);
Queue.submit([&](handler &cgh) {
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
auto acc2_down =
buf2_down.template get_access<access::mode::read_write>(cgh);

auto acc = buf.template get_access<access::mode::read_write>(cgh);
auto acc_up = buf_up.template get_access<access::mode::read_write>(cgh);
auto acc_down =
Expand All @@ -48,15 +40,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
vec<T, N> vwggid(wggid), vsgid(sgid);
if (wggid == 0)
sgsizeacc[0] = SG.get_max_local_range()[0];
/* 1 for odd subgroups and 2 for even*/
acc2[NdItem.get_global_id()] =
SG.shuffle(vec<T, N>(1), vec<T, N>(2),
(sgid % 2) ? 1 : SG.get_max_local_range()[0]);
/* GID-SGID */
acc2_up[NdItem.get_global_id()] = SG.shuffle_up(vwggid, vwggid, sgid);
/* GID-SGID or SGLID if GID+SGID > SGsize*/
acc2_down[NdItem.get_global_id()] =
SG.shuffle_down(vwggid, vec<T, N>(SG.get_local_id().get(0)), sgid);

/*GID of middle element in every subgroup*/
acc[NdItem.get_global_id()] =
Expand All @@ -73,9 +56,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
auto acc = buf.template get_access<access::mode::read_write>();
auto acc_up = buf_up.template get_access<access::mode::read_write>();
auto acc_down = buf_down.template get_access<access::mode::read_write>();
auto acc2 = buf2.template get_access<access::mode::read_write>();
auto acc2_up = buf2_up.template get_access<access::mode::read_write>();
auto acc2_down = buf2_down.template get_access<access::mode::read_write>();
auto acc_xor = buf_xor.template get_access<access::mode::read_write>();
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();

Expand All @@ -98,28 +78,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
exit_if_not_equal_vec<T, N>(
acc[j], vec<T, N>(j / L * L + SGid * sg_size + sg_size / 2),
"shuffle");
/* 1 for odd subgroups and 2 for even*/
exit_if_not_equal_vec<T, N>(acc2[j], vec<T, N>((SGid % 2) ? 1 : 2),
"shuffle2");
/* Value GID+SGID for all element except last SGID in SG*/
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
exit_if_not_equal_vec(acc_down[j], vec<T, N>(j + SGid), "shuffle_down");
exit_if_not_equal_vec(acc2_down[j], vec<T, N>(j + SGid),
"shuffle2_down");
} else { /* SGLID for GID+SGid */
if (j % L + SGid < L) /* Do not go out LG*/
exit_if_not_equal_vec<T, N>(acc2_down[j],
vec<T, N>((j + SGid) % L % sg_size),
"shuffle2_down");
exit_if_not_equal_vec(acc_down[j], vec<T, N>(j + SGid % sg_size),
"shuffle_down");
}
/* Value GID-SGID for all element except first SGID in SG*/
if (j % L % sg_size >= SGid) {
exit_if_not_equal_vec(acc_up[j], vec<T, N>(j - SGid), "shuffle_up");
exit_if_not_equal_vec(acc2_up[j], vec<T, N>(j - SGid), "shuffle2_up");
} else { /* SGLID for GID-SGid */
if (j % L - SGid + sg_size < L) /* Do not go out LG*/
exit_if_not_equal_vec(acc2_up[j], vec<T, N>(j - SGid + sg_size),
"shuffle2_up");
exit_if_not_equal_vec(acc_up[j], vec<T, N>(j - SGid % sg_size),
"shuffle_up");
}
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
exit_if_not_equal_vec(acc_xor[j],
Expand All @@ -133,23 +100,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
}
}

template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
try {
nd_range<1> NdRange(G, L);
buffer<T> buf2(G);
buffer<T> buf2_up(G);
buffer<T> buf2_down(G);
buffer<T> buf(G);
buffer<T> buf_up(G);
buffer<T> buf_down(G);
buffer<T> buf_xor(G);
buffer<size_t> sgsizebuf(1);
Queue.submit([&](handler &cgh) {
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
auto acc2_down =
buf2_down.template get_access<access::mode::read_write>(cgh);

auto acc = buf.template get_access<access::mode::read_write>(cgh);
auto acc_up = buf_up.template get_access<access::mode::read_write>(cgh);
auto acc_down =
Expand All @@ -163,14 +122,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
uint32_t sgid = SG.get_group_id().get(0);
if (wggid == 0)
sgsizeacc[0] = SG.get_max_local_range()[0];
/* 1 for odd subgroups and 2 for even*/
acc2[NdItem.get_global_id()] =
SG.shuffle<T>(1, 2, (sgid % 2) ? 1 : SG.get_max_local_range()[0]);
/* GID-SGID */
acc2_up[NdItem.get_global_id()] = SG.shuffle_up<T>(wggid, wggid, sgid);
/* GID-SGID or SGLID if GID+SGID > SGsize*/
acc2_down[NdItem.get_global_id()] =
SG.shuffle_down<T>(wggid, SG.get_local_id().get(0), sgid);

/*GID of middle element in every subgroup*/
acc[NdItem.get_global_id()] =
Expand All @@ -187,9 +138,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
auto acc = buf.template get_access<access::mode::read_write>();
auto acc_up = buf_up.template get_access<access::mode::read_write>();
auto acc_down = buf_down.template get_access<access::mode::read_write>();
auto acc2 = buf2.template get_access<access::mode::read_write>();
auto acc2_up = buf2_up.template get_access<access::mode::read_write>();
auto acc2_down = buf2_down.template get_access<access::mode::read_write>();
auto acc_xor = buf_xor.template get_access<access::mode::read_write>();
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();

Expand All @@ -212,24 +160,13 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
/*GID of middle element in every subgroup*/
exit_if_not_equal<T>(acc[j], j / L * L + SGid * sg_size + sg_size / 2,
"shuffle");
/* 1 for odd subgroups and 2 for even*/
exit_if_not_equal<T>(acc2[j], (SGid % 2) ? 1 : 2, "shuffle2");
/* Value GID+SGID for all element except last SGID in SG*/
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
exit_if_not_equal<T>(acc_down[j], j + SGid, "shuffle_down");
exit_if_not_equal<T>(acc2_down[j], j + SGid, "shuffle2_down");
} else { /* SGLID for GID+SGid */
if (j % L + SGid < L) /* Do not go out LG*/
exit_if_not_equal<T>(acc2_down[j], (j + SGid) % L % sg_size,
"shuffle2_down");
}
/* Value GID-SGID for all element except first SGID in SG*/
if (j % L % sg_size >= SGid) {
exit_if_not_equal<T>(acc_up[j], j - SGid, "shuffle_up");
exit_if_not_equal<T>(acc2_up[j], j - SGid, "shuffle2_up");
} else { /* SGLID for GID-SGid */
if (j % L - SGid + sg_size < L) /* Do not go out LG*/
exit_if_not_equal<T>(acc2_up[j], j - SGid + sg_size, "shuffle2_up");
}
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
exit_if_not_equal<T>(acc_xor[j], SGBeginGid + (SGLid ^ (SGid % sg_size)),
Expand Down