Skip to content

Commit ed722df

Browse files
authored
[SYCL][NFC][SUBGROUPS] Separate half/double type tests from full type tests (#2020)
1. Separate half/double type tests from full type tests. 2. Fix error caused by half type shuffle.
1 parent be4e641 commit ed722df

16 files changed

+733
-515
lines changed

sycl/test/sub_group/broadcast.cpp

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
// CUDA compilation and runtime do not yet support sub-groups.
33

44
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D SG_GPU %s -o %t_gpu.out
65
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
76
// RUN: %CPU_RUN_PLACEHOLDER %t.out
8-
// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
7+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
98
// RUN: %ACC_RUN_PLACEHOLDER %t.out
109

1110
//==--------- broadcast.cpp - SYCL sub_group broadcast test ----*- C++ -*---==//
@@ -16,52 +15,8 @@
1615
//
1716
//===----------------------------------------------------------------------===//
1817

19-
#include "helper.hpp"
20-
#include <CL/sycl.hpp>
21-
template <typename T>
22-
class sycl_subgr;
23-
using namespace cl::sycl;
24-
template <typename T>
25-
void check(queue &Queue) {
26-
const int G = 240, L = 60;
27-
try {
28-
nd_range<1> NdRange(G, L);
29-
buffer<T> syclbuf(G);
30-
buffer<size_t> sgsizebuf(1);
31-
Queue.submit([&](handler &cgh) {
32-
auto syclacc = syclbuf.template get_access<access::mode::read_write>(cgh);
33-
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
34-
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
35-
intel::sub_group SG = NdItem.get_sub_group();
36-
/*Broadcast GID of element with SGLID == SGID */
37-
syclacc[NdItem.get_global_id()] =
38-
broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
39-
if (NdItem.get_global_id(0) == 0)
40-
sgsizeacc[0] = SG.get_max_local_range()[0];
41-
});
42-
});
43-
auto syclacc = syclbuf.template get_access<access::mode::read_write>();
44-
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
45-
size_t sg_size = sgsizeacc[0];
46-
if (sg_size == 0)
47-
sg_size = L;
48-
int WGid = -1, SGid = 0;
49-
for (int j = 0; j < G; j++) {
50-
if (j % L % sg_size == 0) {
51-
SGid++;
52-
}
53-
if (j % L == 0) {
54-
WGid++;
55-
SGid = 0;
56-
}
57-
exit_if_not_equal<T>(syclacc[j], L * WGid + SGid + SGid * sg_size,
58-
"broadcasted value");
59-
}
60-
} catch (exception e) {
61-
std::cout << "SYCL exception caught: " << e.what();
62-
exit(1);
63-
}
64-
}
18+
#include "broadcast.hpp"
19+
6520
int main() {
6621
queue Queue;
6722
if (!core_sg_supported(Queue.get_device())) {
@@ -73,15 +28,6 @@ int main() {
7328
check<long>(Queue);
7429
check<unsigned long>(Queue);
7530
check<float>(Queue);
76-
// broadcast half type is not supported in OCL CPU RT
77-
#ifdef SG_GPU
78-
if (Queue.get_device().has_extension("cl_khr_fp16")) {
79-
check<cl::sycl::half>(Queue);
80-
}
81-
#endif
82-
if (Queue.get_device().has_extension("cl_khr_fp64")) {
83-
check<double>(Queue);
84-
}
8531
std::cout << "Test passed." << std::endl;
8632
return 0;
8733
}

sycl/test/sub_group/broadcast.hpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//==--------- broadcast.hpp - SYCL sub_group broadcast test ----*- C++ -*---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "helper.hpp"
10+
#include <CL/sycl.hpp>
11+
template <typename T>
12+
class sycl_subgr;
13+
using namespace cl::sycl;
14+
template <typename T>
15+
void check(queue &Queue) {
16+
const int G = 240, L = 60;
17+
try {
18+
nd_range<1> NdRange(G, L);
19+
buffer<T> syclbuf(G);
20+
buffer<size_t> sgsizebuf(1);
21+
Queue.submit([&](handler &cgh) {
22+
auto syclacc = syclbuf.template get_access<access::mode::read_write>(cgh);
23+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
24+
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
25+
intel::sub_group SG = NdItem.get_sub_group();
26+
/*Broadcast GID of element with SGLID == SGID */
27+
syclacc[NdItem.get_global_id()] =
28+
broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
29+
if (NdItem.get_global_id(0) == 0)
30+
sgsizeacc[0] = SG.get_max_local_range()[0];
31+
});
32+
});
33+
auto syclacc = syclbuf.template get_access<access::mode::read_write>();
34+
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
35+
size_t sg_size = sgsizeacc[0];
36+
if (sg_size == 0)
37+
sg_size = L;
38+
int WGid = -1, SGid = 0;
39+
for (int j = 0; j < G; j++) {
40+
if (j % L % sg_size == 0) {
41+
SGid++;
42+
}
43+
if (j % L == 0) {
44+
WGid++;
45+
SGid = 0;
46+
}
47+
exit_if_not_equal<T>(syclacc[j], L * WGid + SGid + SGid * sg_size,
48+
"broadcasted value");
49+
}
50+
} catch (exception e) {
51+
std::cout << "SYCL exception caught: " << e.what();
52+
exit(1);
53+
}
54+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// UNSUPPORTED: cuda
2+
// CUDA compilation and runtime do not yet support sub-groups.
3+
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
6+
7+
//==--------- broadcast_fp16.cpp - SYCL sub_group broadcast test ----*- C++ -*---==//
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===---------------------------------------------------------------------------===//
14+
15+
#include "broadcast.hpp"
16+
17+
int main() {
18+
queue Queue;
19+
if (!core_sg_supported(Queue.get_device())) {
20+
std::cout << "Skipping test\n";
21+
return 0;
22+
}
23+
check<cl::sycl::half>(Queue);
24+
return 0;
25+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// UNSUPPORTED: cuda
2+
// CUDA compilation and runtime do not yet support sub-groups.
3+
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
6+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
7+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
8+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
9+
10+
//==--------- broadcast_fp64.cpp - SYCL sub_group broadcast test ----*- C++ -*---==//
11+
//
12+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
13+
// See https://llvm.org/LICENSE.txt for license information.
14+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15+
//
16+
//===---------------------------------------------------------------------------===//
17+
18+
#include "broadcast.hpp"
19+
20+
int main() {
21+
queue Queue;
22+
if (!core_sg_supported(Queue.get_device())) {
23+
std::cout << "Skipping test\n";
24+
return 0;
25+
}
26+
check<double>(Queue);
27+
std::cout << "Test passed." << std::endl;
28+
return 0;
29+
}

sycl/test/sub_group/reduce.cpp

Lines changed: 3 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
// UNSUPPORTED: cuda
22
// CUDA compilation and runtime do not yet support sub-groups.
33
//
4-
// RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
5-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
65
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
76
// RUN: %CPU_RUN_PLACEHOLDER %t.out
8-
// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
7+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
98
// RUN: %ACC_RUN_PLACEHOLDER %t.out
10-
119
//==--------------- reduce.cpp - SYCL sub_group reduce test ----*- C++ -*---==//
1210
//
1311
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -16,115 +14,19 @@
1614
//
1715
//===----------------------------------------------------------------------===//
1816

19-
#include "helper.hpp"
20-
#include <CL/sycl.hpp>
21-
22-
template <typename T, class BinaryOperation>
23-
class sycl_subgr;
24-
25-
using namespace cl::sycl;
26-
27-
template <typename T, class BinaryOperation>
28-
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
29-
size_t G = 240, size_t L = 60) {
30-
try {
31-
nd_range<1> NdRange(G, L);
32-
buffer<T> buf(G);
33-
buffer<size_t> sgsizebuf(1);
34-
Queue.submit([&](handler &cgh) {
35-
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
36-
auto acc = buf.template get_access<access::mode::read_write>(cgh);
37-
cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
38-
NdRange, [=](nd_item<1> NdItem) {
39-
intel::sub_group sg = NdItem.get_sub_group();
40-
if (skip_init) {
41-
acc[NdItem.get_global_id(0)] =
42-
reduce(sg, T(NdItem.get_global_id(0)), op);
43-
} else {
44-
acc[NdItem.get_global_id(0)] =
45-
reduce(sg, T(NdItem.get_global_id(0)), init, op);
46-
}
47-
if (NdItem.get_global_id(0) == 0)
48-
sgsizeacc[0] = sg.get_max_local_range()[0];
49-
});
50-
});
51-
auto acc = buf.template get_access<access::mode::read_write>();
52-
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
53-
size_t sg_size = sgsizeacc[0];
54-
int WGid = -1, SGid = 0;
55-
T result = init;
56-
for (int j = 0; j < G; j++) {
57-
if (j % L % sg_size == 0) {
58-
SGid++;
59-
result = init;
60-
for (int i = j; (i % L && i % L % sg_size) || (i == j); i++) {
61-
result = op(result, T(i));
62-
}
63-
}
64-
if (j % L == 0) {
65-
WGid++;
66-
SGid = 0;
67-
}
68-
std::string name =
69-
std::string("reduce_") + typeid(BinaryOperation).name();
70-
exit_if_not_equal<T>(acc[j], result, name.c_str());
71-
}
72-
} catch (exception e) {
73-
std::cout << "SYCL exception caught: " << e.what();
74-
exit(1);
75-
}
76-
}
77-
78-
template <typename T>
79-
void check(queue &Queue, size_t G = 240, size_t L = 60) {
80-
// limit data range for half to avoid rounding issues
81-
if (std::is_same<T, cl::sycl::half>::value) {
82-
G = 64;
83-
L = 32;
84-
}
85-
86-
check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
87-
check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
88-
89-
check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
90-
check_op<T>(Queue, T(G), intel::minimum<T>(), true, G, L);
91-
92-
check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
93-
check_op<T>(Queue, T(0), intel::maximum<T>(), true, G, L);
94-
95-
#if __cplusplus >= 201402L
96-
check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
97-
check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
98-
99-
check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
100-
check_op<T>(Queue, T(G), intel::minimum<>(), true, G, L);
101-
102-
check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
103-
check_op<T>(Queue, T(0), intel::maximum<>(), true, G, L);
104-
#endif
105-
}
17+
#include "reduce.hpp"
10618

10719
int main() {
10820
queue Queue;
10921
if (!core_sg_supported(Queue.get_device())) {
11022
std::cout << "Skipping test\n";
11123
return 0;
11224
}
113-
11425
check<int>(Queue);
11526
check<unsigned int>(Queue);
11627
check<long>(Queue);
11728
check<unsigned long>(Queue);
11829
check<float>(Queue);
119-
// reduce half type is not supported in OCL CPU RT
120-
#ifdef SG_GPU
121-
if (Queue.get_device().has_extension("cl_khr_fp16")) {
122-
check<cl::sycl::half>(Queue);
123-
}
124-
#endif
125-
if (Queue.get_device().has_extension("cl_khr_fp64")) {
126-
check<double>(Queue);
127-
}
12830
std::cout << "Test passed." << std::endl;
12931
return 0;
13032
}

0 commit comments

Comments
 (0)