Skip to content

Commit 6376df2

Browse files
authored
[SYCL][Fusion] Test kernel fusion and optimization (intel#1535)
Test different scenarios for kernel fusion, including creation of the fused kernel by the JIT compiler and performance optimizations such as dataflow internalization. Automatically detect availability of the kernel fusion extension in the DPC++ build in `lit.cfg.py` and make it available for `REQUIRES` clauses. Spec: intel#7098 Implementation: intel#7831 Signed-off-by: Lukas Sommer <[email protected]>
1 parent 08612b7 commit 6376df2

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

SYCL/KernelFusion/complete_fusion.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// UNSUPPORTED: cuda || hip
5+
// REQUIRES: fusion
6+
7+
// Test complete fusion without any internalization
8+
9+
#include <sycl/sycl.hpp>
10+
11+
using namespace sycl;
12+
13+
int main() {
14+
constexpr size_t dataSize = 512;
15+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
16+
17+
for (size_t i = 0; i < dataSize; ++i) {
18+
in1[i] = i * 2;
19+
in2[i] = i * 3;
20+
in3[i] = i * 4;
21+
tmp[i] = -1;
22+
out[i] = -1;
23+
}
24+
25+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
26+
27+
{
28+
buffer<int> bIn1{in1, range{dataSize}};
29+
buffer<int> bIn2{in2, range{dataSize}};
30+
buffer<int> bIn3{in3, range{dataSize}};
31+
buffer<int> bTmp{tmp, range{dataSize}};
32+
buffer<int> bOut{out, range{dataSize}};
33+
34+
ext::codeplay::experimental::fusion_wrapper fw{q};
35+
fw.start_fusion();
36+
37+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
38+
39+
q.submit([&](handler &cgh) {
40+
auto accIn1 = bIn1.get_access(cgh);
41+
auto accIn2 = bIn2.get_access(cgh);
42+
auto accTmp = bTmp.get_access(cgh);
43+
cgh.parallel_for<class KernelOne>(
44+
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
45+
});
46+
47+
q.submit([&](handler &cgh) {
48+
auto accTmp = bTmp.get_access(cgh);
49+
auto accIn3 = bIn3.get_access(cgh);
50+
auto accOut = bOut.get_access(cgh);
51+
cgh.parallel_for<class KernelTwo>(
52+
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
53+
});
54+
55+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
56+
57+
assert(!fw.is_in_fusion_mode() &&
58+
"Queue should not be in fusion mode anymore");
59+
}
60+
61+
// Check the results
62+
for (size_t i = 0; i < dataSize; ++i) {
63+
assert(out[i] == (20 * i * i) && "Computation error");
64+
}
65+
66+
return 0;
67+
}

SYCL/lit.cfg.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,23 @@
427427
else:
428428
lit_config.warning("Couldn't find pre-installed AOT device compiler " + aot_tool)
429429

430+
# Check if kernel fusion is available by compiling a small program that will
431+
# be ill-formed (compilation stops with non-zero exit code) if the feature
432+
# test macro for kernel fusion is not defined.
433+
check_fusion_file = 'check_fusion.cpp'
434+
with open(check_fusion_file, 'w') as ff:
435+
ff.write('#include <sycl/sycl.hpp>\n')
436+
ff.write('#ifndef SYCL_EXT_CODEPLAY_KERNEL_FUSION\n')
437+
ff.write('#error \"Feature test for fusion failed\"\n')
438+
ff.write('#endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION\n')
439+
ff.write('int main() { return 0; }\n')
440+
441+
status = subprocess.getstatusoutput(config.dpcpp_compiler + ' -fsycl ' +
442+
check_fusion_file)
443+
if status[0] == 0:
444+
lit_config.note('Kernel fusion extension enabled')
445+
config.available_features.add('fusion')
446+
430447
# Set timeout for a single test
431448
try:
432449
import psutil

0 commit comments

Comments
 (0)