Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][Fusion] Test if kernel fusion API compiles #1404

Merged
merged 1 commit into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions SYCL/KernelFusion/cancel_fusion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// UNSUPPORTED: cuda || hip

// Test cancel fusion

#include <sycl/sycl.hpp>

using namespace sycl;

int main() {
constexpr size_t dataSize = 512;
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];

for (size_t i = 0; i < dataSize; ++i) {
in1[i] = i * 2;
in2[i] = i * 3;
in3[i] = i * 4;
tmp[i] = -1;
out[i] = -1;
}

queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};

{
buffer<int> bIn1{in1, range{dataSize}};
buffer<int> bIn2{in2, range{dataSize}};
buffer<int> bIn3{in3, range{dataSize}};
buffer<int> bTmp{tmp, range{dataSize}};
buffer<int> bOut{out, range{dataSize}};

ext::codeplay::experimental::fusion_wrapper fw{q};
fw.start_fusion();

assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");

q.submit([&](handler &cgh) {
auto accIn1 = bIn1.get_access(cgh);
auto accIn2 = bIn2.get_access(cgh);
auto accTmp = bTmp.get_access(cgh);
cgh.parallel_for<class KernelOne>(
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
});

q.submit([&](handler &cgh) {
auto accTmp = bTmp.get_access(cgh);
auto accIn3 = bIn3.get_access(cgh);
auto accOut = bOut.get_access(cgh);
cgh.parallel_for<class KernelTwo>(
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
});

fw.cancel_fusion();

assert(!fw.is_in_fusion_mode() &&
"Queue should not be in fusion mode anymore");
}

// Check the results
for (size_t i = 0; i < dataSize; ++i) {
assert(out[i] == (20 * i * i) && "Computation error");
}

return 0;
}
64 changes: 64 additions & 0 deletions SYCL/KernelFusion/complete_fusion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// UNSUPPORTED: cuda || hip

// Test complete fusion without any internalization

#include <sycl/sycl.hpp>

using namespace sycl;

int main() {
constexpr size_t dataSize = 512;
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];

for (size_t i = 0; i < dataSize; ++i) {
in1[i] = i * 2;
in2[i] = i * 3;
in3[i] = i * 4;
tmp[i] = -1;
out[i] = -1;
}

queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};

{
buffer<int> bIn1{in1, range{dataSize}};
buffer<int> bIn2{in2, range{dataSize}};
buffer<int> bIn3{in3, range{dataSize}};
buffer<int> bTmp{tmp, range{dataSize}};
buffer<int> bOut{out, range{dataSize}};

ext::codeplay::experimental::fusion_wrapper fw{q};
fw.start_fusion();

assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");

q.submit([&](handler &cgh) {
auto accIn1 = bIn1.get_access(cgh);
auto accIn2 = bIn2.get_access(cgh);
auto accTmp = bTmp.get_access(cgh);
cgh.parallel_for<class KernelOne>(
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
});

q.submit([&](handler &cgh) {
auto accTmp = bTmp.get_access(cgh);
auto accIn3 = bIn3.get_access(cgh);
auto accOut = bOut.get_access(cgh);
cgh.parallel_for<class KernelTwo>(
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
});

fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});

assert(!fw.is_in_fusion_mode() &&
"Queue should not be in fusion mode anymore");
}

// Check the results
for (size_t i = 0; i < dataSize; ++i) {
assert(out[i] == (20 * i * i) && "Computation error");
}

return 0;
}