Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit c3f7aeb

Browse files
committed
Additional fixes for CUDA: move spir-v 1.3 test cases to separate files
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent c938a3d commit c3f7aeb

10 files changed

+247
-135
lines changed

SYCL/SubGroup/reduce.hpp

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
107107
check_op<
108108
sycl_subgr<SpecializationKernelName, class KernelName_BaCGaWDMFeMFqvotbk>,
109109
T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
110+
}
110111

111-
// Use small sub-groups to avoid overflow effects for int multiply operations
112-
// and avoid rounding issues for FP multiply.
113-
L = 4;
112+
template <typename SpecializationKernelName, typename T>
113+
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
114114
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
115115
Queue, T(G), ONEAPI::multiplies<T>(), false, G, L);
116116
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
117117
Queue, T(1), ONEAPI::multiplies<T>(), true, G, L);
118118

119+
// Transparent operator functors.
119120
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
120121
Queue, T(G), ONEAPI::multiplies<>(), false, G, L);
121122
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
122123
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
123124
}
125+
126+
template <typename SpecializationKernelName, typename T>
127+
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
128+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
129+
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
130+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
131+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
132+
133+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
134+
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
135+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
136+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
137+
138+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
139+
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
140+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
141+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
142+
143+
// Transparent operator functors
144+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
145+
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
146+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
147+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
148+
149+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
150+
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
151+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
152+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
153+
154+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
155+
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
156+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
157+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
158+
}

SYCL/SubGroup/reduce_bit_ops.cpp

Lines changed: 0 additions & 64 deletions
This file was deleted.

SYCL/SubGroup/reduce_spirv13.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
14+
// used with the operation MUL, bitwise OR, XOR, AND.
15+
16+
#include "reduce.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device())) {
21+
std::cout << "Skipping test\n";
22+
return 0;
23+
}
24+
25+
check_mul<class MulA, int>(Queue);
26+
check_mul<class MulB, unsigned int>(Queue);
27+
check_mul<class MulC, long>(Queue);
28+
check_mul<class MulD, unsigned long>(Queue);
29+
check_mul<class MulE, float>(Queue);
30+
31+
check_bit_ops<class A, int>(Queue);
32+
check_bit_ops<class B, unsigned int>(Queue);
33+
check_bit_ops<class C, unsigned>(Queue);
34+
check_bit_ops<class D, long>(Queue);
35+
check_bit_ops<class E, unsigned long>(Queue);
36+
check_bit_ops<class F, long long>(Queue);
37+
check_bit_ops<class G, unsigned long long>(Queue);
38+
return 0;
39+
}

SYCL/SubGroup/reduce_spirv13_fp16.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// UNSUPPORTED: cuda
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
7+
// used with MUL operation.
8+
9+
#include "reduce.hpp"
10+
11+
int main() {
12+
queue Queue;
13+
if (!core_sg_supported(Queue.get_device())) {
14+
std::cout << "Skipping test\n";
15+
return 0;
16+
}
17+
check_mul<class MulHalf, cl::sycl::half>(Queue);
18+
std::cout << "Test passed." << std::endl;
19+
return 0;
20+
}

SYCL/SubGroup/reduce_spirv13_fp64.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
14+
// used with MUL operation.
15+
16+
#include "reduce.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device())) {
21+
std::cout << "Skipping test\n";
22+
return 0;
23+
}
24+
check_mul<class MulDouble, double>(Queue);
25+
std::cout << "Test passed." << std::endl;
26+
return 0;
27+
}

SYCL/SubGroup/scan.hpp

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
150150
T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true, G,
151151
L);
152152
}
153+
}
153154

154-
// Use small sub-groups to avoid overflow effects for int multiply operations
155-
// and avoid rounding issues for FP multiply.
156-
L = 4;
155+
template <typename SpecializationKernelName, typename T>
156+
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
157157
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
158158
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
159159
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
@@ -164,3 +164,37 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
164164
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
165165
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
166166
}
167+
168+
template <typename SpecializationKernelName, typename T>
169+
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
170+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
171+
Queue, T(L), ONEAPI::bit_or<T>(), false, G, L);
172+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
173+
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);
174+
175+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
176+
Queue, T(L), ONEAPI::bit_xor<T>(), false, G, L);
177+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
178+
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);
179+
180+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
181+
Queue, T(L), ONEAPI::bit_and<T>(), false, G, L);
182+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
183+
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
184+
185+
// Transparent operator functors.
186+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
187+
Queue, T(L), ONEAPI::bit_or<>(), false, G, L);
188+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
189+
Queue, T(0), ONEAPI::bit_or<>(), true, G, L);
190+
191+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
192+
Queue, T(L), ONEAPI::bit_xor<>(), false, G, L);
193+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
194+
Queue, T(0), ONEAPI::bit_xor<>(), true, G, L);
195+
196+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
197+
Queue, T(L), ONEAPI::bit_and<>(), false, G, L);
198+
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
199+
Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L);
200+
}

SYCL/SubGroup/scan_bit_ops.cpp

Lines changed: 0 additions & 65 deletions
This file was deleted.

SYCL/SubGroup/scan_spirv13.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// UNSUPPORTED: cpu
2+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
3+
// runtime for every supported ISA
4+
5+
// UNSUPPORTED: cuda
6+
7+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
8+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
12+
13+
// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
14+
// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND.
15+
16+
#include "scan.hpp"
17+
18+
int main() {
19+
queue Queue;
20+
if (!core_sg_supported(Queue.get_device())) {
21+
std::cout << "Skipping test\n";
22+
return 0;
23+
}
24+
check_mul<class MulA, int>(Queue);
25+
check_mul<class MulB, unsigned int>(Queue);
26+
check_mul<class MulC, long>(Queue);
27+
check_mul<class MulD, unsigned long>(Queue);
28+
check_mul<class MulE, float>(Queue);
29+
30+
check_bit_ops<class A, int>(Queue);
31+
check_bit_ops<class B, unsigned int>(Queue);
32+
check_bit_ops<class C, unsigned>(Queue);
33+
check_bit_ops<class D, long>(Queue);
34+
check_bit_ops<class E, unsigned long>(Queue);
35+
check_bit_ops<class F, long long>(Queue);
36+
check_bit_ops<class G, unsigned long long>(Queue);
37+
std::cout << "Test passed." << std::endl;
38+
return 0;
39+
}

SYCL/SubGroup/scan_spirv13_fp16.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// UNSUPPORTED: cuda
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
6+
// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
7+
// inclusive_scan() algoriths used with the MUL operation.
8+
9+
#include "scan.hpp"
10+
11+
int main() {
12+
queue Queue;
13+
if (!core_sg_supported(Queue.get_device())) {
14+
std::cout << "Skipping test\n";
15+
return 0;
16+
}
17+
check_mul<class MulHalf, cl::sycl::half>(Queue);
18+
std::cout << "Test passed." << std::endl;
19+
return 0;
20+
}

0 commit comments

Comments
 (0)