Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 047fa38

Browse files
authored
Add 'feature' tests to InvokeSimd (#1443)
1 parent 9113955 commit 047fa38

16 files changed

+1248
-5
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../SPMD_invoke_ESIMD_external.cpp -o %t.out
9+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
10+
//
11+
// VISALTO enable run
12+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
13+
14+
/*
15+
* This tests is the same as InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp,
16+
* but compiles without optional subgroup attribute specified and intended to
17+
* check that compiler is able to choose subgroup size correctly.
18+
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../invoke_simd_struct.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/feature/invoke_simd_struct.cpp, but
14+
* compiles without optional subgroup attribute specified and intended to check
15+
* that compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable after simd_mask supported
6+
// XFAIL: gpu
7+
//
8+
// Check that full compilation works:
9+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt.cpp -o %t.out
10+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
//
12+
// VISALTO enable run
13+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
14+
15+
/*
16+
* This tests is the same as InvokeSimd/feature/popcnt.cpp, but compiles without
17+
* optional subgroup attribute specified and intended to check that compiler is
18+
* able to choose subgroup size correctly.
19+
*/
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
// TODO: enable on Windows once driver is ready
22
// REQUIRES: gpu && linux
33
// UNSUPPORTED: cuda || hip
4-
5-
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt_emu.cpp -o %t.out
67
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
78
//
89
// VISALTO enable run
910
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
1011

1112
/*
12-
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles
13+
* This tests is the same as InvokeSimd/feature/popcnt_emu.cpp, but compiles
1314
* without optional subgroup attribute specified and intended to check that
1415
* compiler is able to choose subgroup size correctly.
1516
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../scale.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/feature/scale.cpp, but compiles without
14+
* optional subgroup attribute specified and intended to check that compiler is
15+
* able to choose subgroup size correctly.
16+
*/
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../void_retval.cpp -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
//
9+
// VISALTO enable run
10+
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
11+
12+
/*
13+
* This tests is the same as InvokeSimd/feature/void_retval.cpp, but compiles
14+
* without optional subgroup attribute specified and intended to check that
15+
* compiler is able to choose subgroup size correctly.
16+
*/
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// TODO: enable when Jira ticket resolved
6+
// XFAIL: gpu
7+
//
8+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
9+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
10+
//
11+
// VISALTO enable run
12+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
13+
14+
/*
15+
* This test checks the case of calling the same external function from the SPMD
16+
* and ESIMD kernels.
17+
*/
18+
19+
#include <sycl/ext/intel/esimd.hpp>
20+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
21+
#include <sycl/ext/oneapi/experimental/uniform.hpp>
22+
#include <sycl/sycl.hpp>
23+
24+
#include <functional>
25+
#include <iostream>
26+
#include <type_traits>
27+
28+
/* Subgroup size attribute is optional
29+
* In case it is absent compiler decides what subgroup size to use
30+
*/
31+
#ifdef IMPL_SUBGROUP
32+
#define SUBGROUP_ATTR
33+
#else
34+
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
35+
#endif
36+
37+
using namespace sycl::ext::oneapi::experimental;
38+
namespace esimd = sycl::ext::intel::esimd;
39+
constexpr int VL = 16;
40+
41+
esimd::simd<float, VL> ESIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
42+
esimd::simd<float, VL> res;
43+
res.copy_from(A + i);
44+
return res;
45+
}
46+
47+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
48+
simd<float, VL> __regcall SIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
49+
esimd::simd<float, VL> res = ESIMD_CALLEE(A, i);
50+
return res;
51+
}
52+
53+
using namespace sycl;
54+
55+
int main() {
56+
constexpr unsigned Size = 1024;
57+
constexpr unsigned GroupSize = 4 * VL;
58+
59+
auto q = queue{gpu_selector_v};
60+
auto dev = q.get_device();
61+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
62+
<< "\n";
63+
64+
float *A = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
65+
float *B = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
66+
float *C = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
67+
68+
for (unsigned i = 0; i < Size; ++i) {
69+
A[i] = i;
70+
B[i] = C[i] = -1;
71+
}
72+
73+
try {
74+
sycl::range<1> GlobalRange{Size};
75+
// Number of workitems in each workgroup.
76+
sycl::range<1> LocalRange{GroupSize};
77+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
78+
79+
auto e = q.submit([&](handler &cgh) {
80+
cgh.parallel_for<class TestInvokeSimd>(
81+
Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
82+
sub_group sg = ndi.get_sub_group();
83+
group<1> g = ndi.get_group();
84+
uint32_t i = sg.get_group_linear_id() * VL +
85+
g.get_group_linear_id() * GroupSize;
86+
uint32_t wi_id = i + sg.get_local_id();
87+
88+
float res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, uniform{i});
89+
B[wi_id] = res;
90+
});
91+
});
92+
e.wait();
93+
} catch (sycl::exception const &e) {
94+
sycl::free(A, q);
95+
sycl::free(B, q);
96+
sycl::free(C, q);
97+
98+
std::cout << "SYCL exception caught: " << e.what() << '\n';
99+
return e.code().value();
100+
}
101+
102+
try {
103+
sycl::range<1> GlobalRange{Size};
104+
// Number of workitems in each workgroup.
105+
sycl::range<1> LocalRange{VL};
106+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
107+
108+
auto e = q.submit([&](handler &cgh) {
109+
cgh.parallel_for<class TestExternalCall>(
110+
Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
111+
uint32_t i = ndi.get_group(0) * VL;
112+
113+
esimd::simd<float, VL> res(SIMD_CALLEE(B, i));
114+
res.copy_to(C + i);
115+
});
116+
});
117+
e.wait();
118+
} catch (sycl::exception const &e) {
119+
sycl::free(A, q);
120+
sycl::free(B, q);
121+
sycl::free(C, q);
122+
123+
std::cout << "SYCL exception caught: " << e.what() << '\n';
124+
return e.code().value();
125+
}
126+
127+
int err_cnt = 0;
128+
129+
for (unsigned i = 0; i < Size; ++i) {
130+
if (A[i] != B[i] || B[i] != C[i]) {
131+
if (++err_cnt < 10) {
132+
std::cout << "failed at index " << i << ", " << A[i] << " != " << B[i]
133+
<< " != " << C[i] << "\n";
134+
}
135+
}
136+
}
137+
138+
sycl::free(A, q);
139+
sycl::free(B, q);
140+
sycl::free(C, q);
141+
142+
if (err_cnt > 0) {
143+
std::cout << " pass rate: "
144+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
145+
<< (Size - err_cnt) << "/" << Size << ")\n";
146+
}
147+
148+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
149+
return err_cnt;
150+
}

0 commit comments

Comments
 (0)