Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit d69c64d

Browse files
authored
Add 'spec' tests to SYCL/InvokeSimd (#1360)
1 parent 10a2d57 commit d69c64d

38 files changed

+3011
-0
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
// Tests invoke_simd support in the compiler/headers
13+
14+
/* Test case specification:
15+
* -----------------------
16+
* Test and report errors if invoked ESIMD function calls SPMD function.
17+
*
18+
* Test case description:
19+
* ---------------------
20+
* This is an additional test case to increase test coverage. It tests a
21+
* happy-path. ESIMD_CALLEE_doVadd() calls doVadd(), which is an entirely
22+
* unmarked function (not tagged with SYCL_EXTERNAL nor SYCL_ESIMD_FUNCTION).
23+
* Such a function "is treated as ESIMD if called froma within ESIMD code"
24+
* and is treated as SPMD if called from within SPMD (SYCL) code.
25+
*
26+
* Currently, this test case passes because no rules are violated; the unmarked
27+
* function doVadd() is treated as ESIMD when called from within ESIMD code
28+
* (ESIMD_CALLEE_doVadd()), and as SPMD when called from within SPMD code.
29+
* Therefore, we never go from ESIMD to SPMD, or from SPMD to ESIMD in this test
30+
* case.
31+
*
32+
* This test also runs with all types of VISA link time optimizations enabled.
33+
*/
34+
35+
#include <sycl/ext/intel/esimd.hpp>
36+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
37+
#include <sycl/sycl.hpp>
38+
39+
#include <functional>
40+
#include <iostream>
41+
#include <type_traits>
42+
43+
/* Subgroup size attribute is optional
44+
* In case it is absent compiler decides what subgroup size to use
45+
*/
46+
#ifdef IMPL_SUBGROUP
47+
#define SUBGROUP_ATTR
48+
#else
49+
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
50+
#endif
51+
52+
using namespace sycl;
53+
using namespace sycl::ext::oneapi::experimental;
54+
namespace esimd = sycl::ext::intel::esimd;
55+
56+
// 1024 / 16 = 64: There will be 64 iterations that process 16 elements each
57+
constexpr int Size = 1024;
58+
// constexpr int Size = 1024 * 128;
59+
constexpr int VL = 16;
60+
61+
/* This unmarked function will be treated the same as its calling context;
62+
* if it is called from within ESIMD code, it will be treated as ESIMD,
63+
* if it is called from within SPMD code, it will be treated as SPMD.
64+
*/
65+
// SYCL_EXTERNAL
66+
float doVadd(float a, float b) { return a + b; }
67+
68+
__attribute__((always_inline)) esimd::simd<float, VL>
69+
ESIMD_CALLEE_doVadd(esimd::simd<float, VL> va,
70+
esimd::simd<float, VL> vb) SYCL_ESIMD_FUNCTION {
71+
esimd::simd<float, VL> vc;
72+
for (int i = 0; i < VL; ++i) {
73+
float a = va[i];
74+
float b = vb[i];
75+
vc[i] = doVadd(a, b);
76+
}
77+
return vc;
78+
}
79+
80+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
81+
simd<float, VL> __regcall SIMD_CALLEE_doVadd(
82+
simd<float, VL> va, simd<float, VL> vb) SYCL_ESIMD_FUNCTION;
83+
84+
constexpr bool use_invoke_simd = true;
85+
86+
int main(void) {
87+
float *A = new float[Size];
88+
float *B = new float[Size];
89+
float *C = new float[Size];
90+
91+
for (unsigned i = 0; i < Size; ++i) {
92+
A[i] = B[i] = i;
93+
C[i] = 0.0f;
94+
}
95+
96+
try {
97+
buffer<float, 1> bufa(A, range<1>(Size));
98+
buffer<float, 1> bufb(B, range<1>(Size));
99+
buffer<float, 1> bufc(C, range<1>(Size));
100+
101+
// We need that many workgroups
102+
sycl::range<1> GlobalRange{Size};
103+
104+
// We need that many threads in each group
105+
sycl::range<1> LocalRange{VL};
106+
107+
auto q = queue{gpu_selector_v};
108+
auto dev = q.get_device();
109+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
110+
<< "\n";
111+
112+
auto e = q.submit([&](handler &cgh) {
113+
auto PA = bufa.get_access<access::mode::read>(cgh);
114+
auto PB = bufb.get_access<access::mode::read>(cgh);
115+
auto PC = bufc.get_access<access::mode::write>(cgh);
116+
117+
cgh.parallel_for<class Test>(
118+
nd_range<1>(GlobalRange, LocalRange),
119+
[=](nd_item<1> item) SUBGROUP_ATTR {
120+
sycl::group<1> g = item.get_group();
121+
sycl::sub_group sg = item.get_sub_group();
122+
123+
unsigned int offset = g.get_group_id() * g.get_local_range() +
124+
sg.get_group_id() * sg.get_max_local_range();
125+
float va = sg.load(PA.get_pointer() + offset);
126+
float vb = sg.load(PB.get_pointer() + offset);
127+
float vc;
128+
129+
if constexpr (use_invoke_simd) {
130+
vc = invoke_simd(sg, SIMD_CALLEE_doVadd, va, vb);
131+
} else {
132+
vc = doVadd(va, vb);
133+
}
134+
sg.store(PC.get_pointer() + offset, vc);
135+
});
136+
});
137+
e.wait();
138+
} catch (sycl::exception const &e) {
139+
delete[] A;
140+
delete[] B;
141+
delete[] C;
142+
143+
std::cout << "SYCL exception caught: " << e.what() << '\n';
144+
return e.code().value();
145+
}
146+
147+
int err_cnt = 0;
148+
149+
for (unsigned i = 0; i < Size; ++i) {
150+
if (A[i] + B[i] != C[i]) {
151+
if (++err_cnt < 10) {
152+
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
153+
<< " + " << B[i] << "\n";
154+
}
155+
}
156+
}
157+
if (err_cnt > 0) {
158+
std::cout << " pass rate: "
159+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
160+
<< (Size - err_cnt) << "/" << Size << ")\n";
161+
}
162+
163+
delete[] A;
164+
delete[] B;
165+
delete[] C;
166+
167+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
168+
return err_cnt > 0 ? 1 : 0;
169+
}
170+
171+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
172+
simd<float, VL> __regcall SIMD_CALLEE_doVadd(
173+
simd<float, VL> va, simd<float, VL> vb) SYCL_ESIMD_FUNCTION {
174+
esimd::simd<float, VL> res = ESIMD_CALLEE_doVadd(va, vb);
175+
return res;
176+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../ESIMD_to_unmarked_function.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/spec/ESIMD_to_unmarked_function.cpp,
14+
* but compiles without optional subgroup attribute specified and intended to
15+
* check that compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../function_overloads.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/function_overloads.cpp, but
17+
* compiles without optional subgroup attribute specified and intended to check
18+
* that compiler is able to choose subgroup size correctly.
19+
*/
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
5+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out
6+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
7+
//
8+
// VISALTO enable run
9+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
10+
11+
/*
12+
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles
13+
* without optional subgroup attribute specified and intended to check that
14+
* compiler is able to choose subgroup size correctly.
15+
*/
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../multiple_SPMD_to_multiple_ESIMD.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as
14+
* InvokeSimd/spec/multiple_SPMD_to_multiple_ESIMD.cpp, but compiles without
15+
* optional subgroup attribute specified and intended to check that compiler
16+
* is able to choose subgroup size correctly.
17+
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../multiple_SPMD_to_single_ESIMD.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/spec/multiple_SPMD_to_single_ESIMD.cpp,
14+
* but compiles without optional subgroup attribute specified and intended to
15+
* check that compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../nested_ESIMD_to_ESIMD.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/spec/nested_ESIMD_to_ESIMD.cpp, but
14+
* compiles without optional subgroup attribute specified and intended to check
15+
* that compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../nested_SPMD_to_ESIMD.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/spec/nested_SPMD_to_ESIMD.cpp, but
14+
* compiles without optional subgroup attribute specified and intended to check
15+
* that compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../simd_mask.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/simd_mask.cpp, but compiles without
17+
* optional subgroup attribute specified and intended to check that compiler is
18+
* able to choose subgroup size correctly.
19+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/tuple.cpp, but compiles without
17+
* optional subgroup attribute specified and intended to check that compiler is
18+
* able to choose subgroup size correctly.
19+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_return.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/tuple_return.cpp, but compiles
17+
* without optional subgroup attribute specified and intended to check that
18+
* compiler is able to choose subgroup size correctly.
19+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_vadd.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/tuple_vadd.cpp, but compiles
17+
* without optional subgroup attribute specified and intended to check that
18+
* compiler is able to choose subgroup size correctly.
19+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../uniform_retval.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/spec/uniform_retval.cpp, but compiles
17+
* without optional subgroup attribute specified and intended to check that
18+
* compiler is able to choose subgroup size correctly.
19+
*/

0 commit comments

Comments
 (0)