Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit d565b8a

Browse files
authored
[SYCL] Add test cases for muptiplies,bit_or,bit_xor,bit_and subgroup … (#158)
* Move spir-v 1.3 test cases to separate files to avoid failure on CUDA * Fix the check for sub-groups availability. It returned false even when device had support for sub-groups * Add checks for cl_khr_fp16/fp64
1 parent 950acbc commit d565b8a

13 files changed

+292
-39
lines changed

SYCL/SubGroup/helper.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ void exit_if_not_equal_vec(vec<T, N> val, vec<T, N> ref, const char *name) {
154154
}
155155

156156
bool core_sg_supported(const device &Device) {
157-
return (Device.has_extension("cl_khr_subgroups") ||
158-
Device.get_info<info::device::version>().find(" 2.1") !=
159-
string_class::npos);
157+
if (Device.has_extension("cl_khr_subgroups"))
158+
return true;
159+
return Device.get_info<info::device::version>() >= "2.1";
160160
}

SYCL/SubGroup/reduce.hpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
8888
check_op<sycl_subgr<SpecializationKernelName, class KernelName_bPPlfvdGShi>,
8989
T>(Queue, T(0), ONEAPI::maximum<T>(), true, G, L);
9090

91-
#if __cplusplus >= 201402L
91+
// Transparent operator functors.
9292
check_op<sycl_subgr<SpecializationKernelName,
9393
class KernelName_fkOyLRYirfMnvBcnbRFy>,
9494
T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
@@ -107,5 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
107107
check_op<
108108
sycl_subgr<SpecializationKernelName, class KernelName_BaCGaWDMFeMFqvotbk>,
109109
T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
110-
#endif
110+
}
111+
112+
template <typename SpecializationKernelName, typename T>
113+
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
114+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
115+
Queue, T(G), ONEAPI::multiplies<T>(), false, G, L);
116+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
117+
Queue, T(1), ONEAPI::multiplies<T>(), true, G, L);
118+
119+
// Transparent operator functors.
120+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
121+
Queue, T(G), ONEAPI::multiplies<>(), false, G, L);
122+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
123+
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
124+
}
125+
126+
template <typename SpecializationKernelName, typename T>
127+
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
128+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
129+
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
130+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
131+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
132+
133+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
134+
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
135+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
136+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
137+
138+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
139+
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
140+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
141+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
142+
143+
// Transparent operator functors
144+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
145+
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
146+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
147+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
148+
149+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
150+
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
151+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
152+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
153+
154+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
155+
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
156+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
157+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
111158
}

SYCL/SubGroup/reduce_fp16.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3-
///==---------- reduce_fp16.cpp - SYCL sub_group reduce test ----*- C++ -*--==//
4-
//
5-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6-
// See https://llvm.org/LICENSE.txt for license information.
7-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8-
//
9-
//===----------------------------------------------------------------------===//
3+
4+
// This test verifies the correct work of the sub-group algorithm reduce().
105

116
#include "reduce.hpp"
127

138
int main() {
149
queue Queue;
15-
if (!core_sg_supported(Queue.get_device())) {
10+
if (!core_sg_supported(Queue.get_device()) ||
11+
!Queue.get_device().has_extension("cl_khr_fp16")) {
1612
std::cout << "Skipping test\n";
1713
return 0;
1814
}

SYCL/SubGroup/reduce_fp64.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,15 @@
77
// RUN: %CPU_RUN_PLACEHOLDER %t.out
88
// RUN: %GPU_RUN_PLACEHOLDER %t.out
99
// RUN: %ACC_RUN_PLACEHOLDER %t.out
10-
///==---------- reduce_fp64.cpp - SYCL sub_group reduce test ----*- C++ -*--==//
11-
//
12-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
13-
// See https://llvm.org/LICENSE.txt for license information.
14-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15-
//
16-
//===----------------------------------------------------------------------===//
10+
11+
// This test verifies the correct work of the sub-group algorithm reduce().
1712

1813
#include "reduce.hpp"
1914

2015
int main() {
2116
queue Queue;
22-
if (!core_sg_supported(Queue.get_device())) {
17+
if (!core_sg_supported(Queue.get_device()) ||
18+
!Queue.get_device().has_extension("cl_khr_fp64")) {
2319
std::cout << "Skipping test\n";
2420
return 0;
2521
}

SYCL/SubGroup/reduce_spirv13.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
14+
// used with the operation MUL, bitwise OR, XOR, AND.
15+
16+
#include "reduce.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device())) {
21+
std::cout << "Skipping test\n";
22+
return 0;
23+
}
24+
25+
check_mul<class MulA, int>(Queue);
26+
check_mul<class MulB, unsigned int>(Queue);
27+
check_mul<class MulC, long>(Queue);
28+
check_mul<class MulD, unsigned long>(Queue);
29+
check_mul<class MulE, float>(Queue);
30+
31+
check_bit_ops<class A, int>(Queue);
32+
check_bit_ops<class B, unsigned int>(Queue);
33+
check_bit_ops<class C, unsigned>(Queue);
34+
check_bit_ops<class D, long>(Queue);
35+
check_bit_ops<class E, unsigned long>(Queue);
36+
check_bit_ops<class F, long long>(Queue);
37+
check_bit_ops<class G, unsigned long long>(Queue);
38+
return 0;
39+
}

SYCL/SubGroup/reduce_spirv13_fp16.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// UNSUPPORTED: cuda
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
7+
// used with MUL operation.
8+
9+
#include "reduce.hpp"
10+
11+
int main() {
12+
queue Queue;
13+
if (!core_sg_supported(Queue.get_device()) ||
14+
!Queue.get_device().has_extension("cl_khr_fp16")) {
15+
std::cout << "Skipping test\n";
16+
return 0;
17+
}
18+
check_mul<class MulHalf, cl::sycl::half>(Queue);
19+
std::cout << "Test passed." << std::endl;
20+
return 0;
21+
}

SYCL/SubGroup/reduce_spirv13_fp64.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
14+
// used with MUL operation.
15+
16+
#include "reduce.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device()) ||
21+
!Queue.get_device().has_extension("cl_khr_fp64")) {
22+
std::cout << "Skipping test\n";
23+
return 0;
24+
}
25+
check_mul<class MulDouble, double>(Queue);
26+
std::cout << "Test passed." << std::endl;
27+
return 0;
28+
}

SYCL/SubGroup/scan.hpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
115115
Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(), true, G, L);
116116
}
117117

118-
#if __cplusplus >= 201402L
118+
// Transparent operator functors.
119119
check_op<sycl_subgr<SpecializationKernelName, class KernelName_TPWS>, T>(
120120
Queue, T(L), ONEAPI::plus<>(), false, G, L);
121121
check_op<sycl_subgr<SpecializationKernelName, class KernelName_hWZv>, T>(
@@ -150,5 +150,51 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
150150
T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true, G,
151151
L);
152152
}
153-
#endif
153+
}
154+
155+
template <typename SpecializationKernelName, typename T>
156+
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
157+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
158+
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
159+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
160+
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
161+
162+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
163+
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
164+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
165+
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
166+
}
167+
168+
template <typename SpecializationKernelName, typename T>
169+
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
170+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
171+
Queue, T(L), ONEAPI::bit_or<T>(), false, G, L);
172+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
173+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
174+
175+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
176+
Queue, T(L), ONEAPI::bit_xor<T>(), false, G, L);
177+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
178+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
179+
180+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
181+
Queue, T(L), ONEAPI::bit_and<T>(), false, G, L);
182+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
183+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
184+
185+
// Transparent operator functors.
186+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
187+
Queue, T(L), ONEAPI::bit_or<>(), false, G, L);
188+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
189+
Queue, T(0), ONEAPI::bit_or<>(), true, G, L);
190+
191+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
192+
Queue, T(L), ONEAPI::bit_xor<>(), false, G, L);
193+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
194+
Queue, T(0), ONEAPI::bit_xor<>(), true, G, L);
195+
196+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
197+
Queue, T(L), ONEAPI::bit_and<>(), false, G, L);
198+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
199+
Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L);
154200
}

SYCL/SubGroup/scan_fp16.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,15 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %GPU_RUN_PLACEHOLDER %t.out
33

4-
//==---------- scan_fp16.cpp - SYCL sub_group scan test --------*- C++ -*---==//
5-
//
6-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7-
// See https://llvm.org/LICENSE.txt for license information.
8-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9-
//
10-
//===----------------------------------------------------------------------===//
4+
// This test verifies the correct work of the sub-group algorithms
5+
// exclusive_scan() and inclusive_scan().
116

127
#include "scan.hpp"
138

149
int main() {
1510
queue Queue;
16-
if (!core_sg_supported(Queue.get_device())) {
11+
if (!core_sg_supported(Queue.get_device()) ||
12+
!Queue.get_device().has_extension("cl_khr_fp16")) {
1713
std::cout << "Skipping test\n";
1814
return 0;
1915
}

SYCL/SubGroup/scan_fp64.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,15 @@
88
// RUN: %GPU_RUN_PLACEHOLDER %t.out
99
// RUN: %ACC_RUN_PLACEHOLDER %t.out
1010

11-
//==---------- scan_fp64.cpp - SYCL sub_group scan test --------*- C++ -*---==//
12-
//
13-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
14-
// See https://llvm.org/LICENSE.txt for license information.
15-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
16-
//
17-
//===----------------------------------------------------------------------===//
11+
// This test verifies the correct work of the sub-group algorithms
12+
// exclusive_scan() and inclusive_scan().
1813

1914
#include "scan.hpp"
2015

2116
int main() {
2217
queue Queue;
23-
if (!core_sg_supported(Queue.get_device())) {
18+
if (!core_sg_supported(Queue.get_device()) ||
19+
!Queue.get_device().has_extension("cl_khr_fp64")) {
2420
std::cout << "Skipping test\n";
2521
return 0;
2622
}

SYCL/SubGroup/scan_spirv13.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
14+
// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND.
15+
16+
#include "scan.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device())) {
21+
std::cout << "Skipping test\n";
22+
return 0;
23+
}
24+
check_mul<class MulA, int>(Queue);
25+
check_mul<class MulB, unsigned int>(Queue);
26+
check_mul<class MulC, long>(Queue);
27+
check_mul<class MulD, unsigned long>(Queue);
28+
check_mul<class MulE, float>(Queue);
29+
30+
check_bit_ops<class A, int>(Queue);
31+
check_bit_ops<class B, unsigned int>(Queue);
32+
check_bit_ops<class C, unsigned>(Queue);
33+
check_bit_ops<class D, long>(Queue);
34+
check_bit_ops<class E, unsigned long>(Queue);
35+
check_bit_ops<class F, long long>(Queue);
36+
check_bit_ops<class G, unsigned long long>(Queue);
37+
std::cout << "Test passed." << std::endl;
38+
return 0;
39+
}

SYCL/SubGroup/scan_spirv13_fp16.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// UNSUPPORTED: cuda
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
7+
// inclusive_scan() algoriths used with the MUL operation.
8+
9+
#include "scan.hpp"
10+
11+
int main() {
12+
queue Queue;
13+
if (!core_sg_supported(Queue.get_device()) ||
14+
!Queue.get_device().has_extension("cl_khr_fp16")) {
15+
std::cout << "Skipping test\n";
16+
return 0;
17+
}
18+
check_mul<class MulHalf, cl::sycl::half>(Queue);
19+
std::cout << "Test passed." << std::endl;
20+
return 0;
21+
}

0 commit comments

Comments
 (0)