Skip to content

Commit f252c1c

Browse files
sommerlukasbb-sycl
authored andcommitted
[SYCL][Fusion] Test if kernel fusion API compiles (intel#1404)
Two simple tests to check that code using the kernel fusion extension API compiles correctly. The tests currently do not yet execute the compiled application, as the necessary functionality will only be added to the implementation in a later PR. Spec: intel/llvm#7098 Implementation: intel/llvm#7416 Signed-off-by: Lukas Sommer <[email protected]>
1 parent aefb8bd commit f252c1c

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

SYCL/KernelFusion/cancel_fusion.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// UNSUPPORTED: cuda || hip
3+
4+
// Test cancel fusion
5+
6+
#include <sycl/sycl.hpp>
7+
8+
using namespace sycl;
9+
10+
int main() {
11+
constexpr size_t dataSize = 512;
12+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
13+
14+
for (size_t i = 0; i < dataSize; ++i) {
15+
in1[i] = i * 2;
16+
in2[i] = i * 3;
17+
in3[i] = i * 4;
18+
tmp[i] = -1;
19+
out[i] = -1;
20+
}
21+
22+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
23+
24+
{
25+
buffer<int> bIn1{in1, range{dataSize}};
26+
buffer<int> bIn2{in2, range{dataSize}};
27+
buffer<int> bIn3{in3, range{dataSize}};
28+
buffer<int> bTmp{tmp, range{dataSize}};
29+
buffer<int> bOut{out, range{dataSize}};
30+
31+
ext::codeplay::experimental::fusion_wrapper fw{q};
32+
fw.start_fusion();
33+
34+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
35+
36+
q.submit([&](handler &cgh) {
37+
auto accIn1 = bIn1.get_access(cgh);
38+
auto accIn2 = bIn2.get_access(cgh);
39+
auto accTmp = bTmp.get_access(cgh);
40+
cgh.parallel_for<class KernelOne>(
41+
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
42+
});
43+
44+
q.submit([&](handler &cgh) {
45+
auto accTmp = bTmp.get_access(cgh);
46+
auto accIn3 = bIn3.get_access(cgh);
47+
auto accOut = bOut.get_access(cgh);
48+
cgh.parallel_for<class KernelTwo>(
49+
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
50+
});
51+
52+
fw.cancel_fusion();
53+
54+
assert(!fw.is_in_fusion_mode() &&
55+
"Queue should not be in fusion mode anymore");
56+
}
57+
58+
// Check the results
59+
for (size_t i = 0; i < dataSize; ++i) {
60+
assert(out[i] == (20 * i * i) && "Computation error");
61+
}
62+
63+
return 0;
64+
}

SYCL/KernelFusion/complete_fusion.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// UNSUPPORTED: cuda || hip
3+
4+
// Test complete fusion without any internalization
5+
6+
#include <sycl/sycl.hpp>
7+
8+
using namespace sycl;
9+
10+
int main() {
11+
constexpr size_t dataSize = 512;
12+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
13+
14+
for (size_t i = 0; i < dataSize; ++i) {
15+
in1[i] = i * 2;
16+
in2[i] = i * 3;
17+
in3[i] = i * 4;
18+
tmp[i] = -1;
19+
out[i] = -1;
20+
}
21+
22+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
23+
24+
{
25+
buffer<int> bIn1{in1, range{dataSize}};
26+
buffer<int> bIn2{in2, range{dataSize}};
27+
buffer<int> bIn3{in3, range{dataSize}};
28+
buffer<int> bTmp{tmp, range{dataSize}};
29+
buffer<int> bOut{out, range{dataSize}};
30+
31+
ext::codeplay::experimental::fusion_wrapper fw{q};
32+
fw.start_fusion();
33+
34+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
35+
36+
q.submit([&](handler &cgh) {
37+
auto accIn1 = bIn1.get_access(cgh);
38+
auto accIn2 = bIn2.get_access(cgh);
39+
auto accTmp = bTmp.get_access(cgh);
40+
cgh.parallel_for<class KernelOne>(
41+
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
42+
});
43+
44+
q.submit([&](handler &cgh) {
45+
auto accTmp = bTmp.get_access(cgh);
46+
auto accIn3 = bIn3.get_access(cgh);
47+
auto accOut = bOut.get_access(cgh);
48+
cgh.parallel_for<class KernelTwo>(
49+
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
50+
});
51+
52+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
53+
54+
assert(!fw.is_in_fusion_mode() &&
55+
"Queue should not be in fusion mode anymore");
56+
}
57+
58+
// Check the results
59+
for (size_t i = 0; i < dataSize; ++i) {
60+
assert(out[i] == (20 * i * i) && "Computation error");
61+
}
62+
63+
return 0;
64+
}

0 commit comments

Comments
 (0)