Skip to content

[SYCL][Graph] Enable specialization constants with graph #11556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1776,10 +1776,6 @@ class __SYCL_EXPORT handler {
void set_specialization_constant(
typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {

throwIfGraphAssociated<
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
sycl_specialization_constants>();

setStateSpecConstSet();

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
Expand All @@ -1794,10 +1790,6 @@ class __SYCL_EXPORT handler {
typename std::remove_reference_t<decltype(SpecName)>::value_type
get_specialization_constant() const {

throwIfGraphAssociated<
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
sycl_specialization_constants>();

if (isStateExplicitKernelBundle())
throw sycl::exception(make_error_code(errc::invalid),
"Specialization constants cannot be read after "
Expand Down
15 changes: 7 additions & 8 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2405,6 +2405,7 @@ pi_int32 enqueueImpCommandBufferKernel(
pi_kernel PiKernel = nullptr;
std::mutex *KernelMutex = nullptr;
pi_program PiProgram = nullptr;
std::shared_ptr<device_image_impl> DeviceImageImpl = nullptr;

auto Kernel = CommandGroup.MSyclKernel;
auto KernelBundleImplPtr = CommandGroup.MKernelBundle;
Expand All @@ -2417,7 +2418,6 @@ pi_int32 enqueueImpCommandBufferKernel(
// they can simply be launched directly.
if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) {
std::shared_ptr<kernel_impl> SyclKernelImpl;
std::shared_ptr<device_image_impl> DeviceImageImpl;
auto KernelName = CommandGroup.MKernelName;
kernel_id KernelID =
detail::ProgramManager::getInstance().getSYCLKernelID(KernelName);
Expand All @@ -2439,13 +2439,12 @@ pi_int32 enqueueImpCommandBufferKernel(
ContextImpl, DeviceImpl, CommandGroup.MKernelName);
}

auto SetFunc = [&Plugin, &PiKernel, &Ctx, &getMemAllocationFunc](
sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) {
sycl::detail::SetArgBasedOnType(
Plugin, PiKernel,
nullptr /* TODO: Handle spec constants and pass device image here */
,
getMemAllocationFunc, Ctx, false, Arg, NextTrueIndex);
auto SetFunc = [&Plugin, &PiKernel, &DeviceImageImpl, &Ctx,
&getMemAllocationFunc](sycl::detail::ArgDesc &Arg,
size_t NextTrueIndex) {
sycl::detail::SetArgBasedOnType(Plugin, PiKernel, DeviceImageImpl,
getMemAllocationFunc, Ctx, false, Arg,
NextTrueIndex);
};
// Copy args for modification
auto Args = CommandGroup.MArgs;
Expand Down
16 changes: 8 additions & 8 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,10 @@ bool handler::isStateExplicitKernelBundle() const {
std::shared_ptr<detail::kernel_bundle_impl>
handler::getOrInsertHandlerKernelBundle(bool Insert) const {
if (!MImpl->MKernelBundle && Insert) {
MImpl->MKernelBundle =
detail::getSyclObjImpl(get_kernel_bundle<bundle_state::input>(
MQueue->get_context(), {MQueue->get_device()}, {}));
auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
MImpl->MKernelBundle = detail::getSyclObjImpl(
get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
}
return MImpl->MKernelBundle;
}
Expand Down Expand Up @@ -179,10 +180,10 @@ event handler::finalize() {
// Make sure implicit non-interop kernel bundles have the kernel
if (!KernelBundleImpPtr->isInterop() &&
!MImpl->isStateExplicitKernelBundle()) {
auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
kernel_id KernelID =
detail::ProgramManager::getInstance().getSYCLKernelID(MKernelName);
bool KernelInserted =
KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
// If kernel was not inserted and the bundle is in input mode we try
// building it and trying to find the kernel in executable mode
if (!KernelInserted &&
Expand All @@ -194,8 +195,7 @@ event handler::finalize() {
build(KernelBundle);
KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
setHandlerKernelBundle(KernelBundleImpPtr);
KernelInserted =
KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
}
// If the kernel was not found in executable mode we throw an exception
if (!KernelInserted)
Expand Down Expand Up @@ -835,7 +835,7 @@ void handler::verifyUsedKernelBundle(const std::string &KernelName) {

kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
device Dev =
(MGraph) ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
throw sycl::exception(
make_error_code(errc::kernel_not_supported),
Expand Down
16 changes: 16 additions & 0 deletions sycl/test-e2e/Graph/Explicit/spec_constants_handler_api.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// REQUIRES: cuda || level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using ZE_DEBUG
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
//
// CHECK-NOT: LEAK

// The following limitation is not restricted to Sycl-Graph
// but comes from the orignal test : `SpecConstants/2020/handler-api.cpp`
// FIXME: ACC devices use emulation path, which is not yet supported
// UNSUPPORTED: accelerator

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/spec_constants_handler_api.cpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// REQUIRES: cuda || level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using ZE_DEBUG
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
//
// CHECK-NOT: LEAK

// The following limitation is not restricted to Sycl-Graph
// but comes from the orignal test : `SpecConstants/2020/kernel-bundle-api.cpp`
// FIXME: ACC devices use emulation path, which is not yet supported
// UNSUPPORTED: accelerator
// UNSUPPORTED: hip

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/spec_constants_kernel_bundle_api.cpp"
210 changes: 210 additions & 0 deletions sycl/test-e2e/Graph/Inputs/spec_constants_handler_api.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// This test is intended to check basic operations with SYCL 2020 specialization
// constants using Graph and sycl::handler and sycl::kernel_handler APIs
// This test was taken from `SpecConstants/2020/handler-api.cpp`.
// Variable names have been changed to meet PascalCase naming convention
// requirements.

#include "../graph_common.hpp"

constexpr sycl::specialization_id<int> IntId;
constexpr sycl::specialization_id<int> IntId2(2);
constexpr sycl::specialization_id<float> FloatId(3.14);

class TestDefaultValuesKernel;
class EmptyKernel;
class TestSetAndGetOnDevice;

bool test_default_values(sycl::queue Queue);
bool test_set_and_get_on_host(sycl::queue Queue);
bool test_set_and_get_on_device(sycl::queue Queue);

bool test_set_and_get_on_device(sycl::queue Queue);

int main() {
auto ExceptionHandler = [&](sycl::exception_list Exceptions) {
for (std::exception_ptr const &E : Exceptions) {
try {
std::rethrow_exception(E);
} catch (sycl::exception const &E) {
std::cout << "An async SYCL exception was caught: " << E.what()
<< std::endl;
std::exit(1);
}
}
};

queue Queue{ExceptionHandler,
{sycl::ext::intel::property::queue::no_immediate_command_list{}}};

unsigned Errors = 0;
if (!test_default_values(Queue)) {
std::cout << "Test for default values of specialization constants failed!"
<< std::endl;
Errors++;
}

if (!test_set_and_get_on_host(Queue)) {
std::cout << "Test for set and get API on host failed!" << std::endl;
Errors++;
}

if (!test_set_and_get_on_device(Queue)) {
std::cout << "Test for set and get API on device failed!" << std::endl;
Errors++;
}

return (Errors == 0) ? 0 : 1;
};

bool test_default_values(sycl::queue Queue) {
sycl::buffer<int> IntBuffer(1);
IntBuffer.set_write_back(false);
sycl::buffer<int> IntBuffer2(1);
IntBuffer2.set_write_back(false);
sycl::buffer<float> FloatBuffer(1);
FloatBuffer.set_write_back(false);

{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, ([&](sycl::handler &CGH) {
auto IntAcc =
IntBuffer.get_access<sycl::access::mode::write>(CGH);
auto IntAcc2 =
IntBuffer2.get_access<sycl::access::mode::write>(CGH);
auto FloatAcc =
FloatBuffer.get_access<sycl::access::mode::write>(CGH);

CGH.single_task<TestDefaultValuesKernel>(
[=](sycl::kernel_handler KH) {
IntAcc[0] = KH.get_specialization_constant<IntId>();
IntAcc2[0] = KH.get_specialization_constant<IntId2>();
FloatAcc[0] = KH.get_specialization_constant<FloatId>();
});
}));

auto GraphExec = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
Queue.wait_and_throw();
}

unsigned Errors = 0;
sycl::host_accessor IntAcc(IntBuffer, sycl::read_only);
if (!check_value(
0, IntAcc[0],
"integer specialization constant (defined without default value)"))
Errors++;

sycl::host_accessor IntAcc2(IntBuffer2, sycl::read_only);
if (!check_value(2, IntAcc2[0], "integer specialization constant"))
Errors++;

sycl::host_accessor FloatAcc(FloatBuffer, sycl::read_only);
if (!check_value(3.14f, FloatAcc[0], "float specialization constant"))
Errors++;

return Errors == 0;
}

bool test_set_and_get_on_host(sycl::queue Queue) {
unsigned Errors = 0;

exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(
Graph, Queue, ([&](sycl::handler &CGH) {
if (!check_value(
0, CGH.get_specialization_constant<IntId>(),
"integer specializaiton constant before setting any value"))
++Errors;

if (!check_value(
3.14f, CGH.get_specialization_constant<FloatId>(),
"float specializaiton constant before setting any value"))
++Errors;

int NewIntValue = 8;
float NewFloatValue = 3.0f;
CGH.set_specialization_constant<IntId>(NewIntValue);
CGH.set_specialization_constant<FloatId>(NewFloatValue);

if (!check_value(
NewIntValue, CGH.get_specialization_constant<IntId>(),
"integer specializaiton constant after setting a new value"))
++Errors;

if (!check_value(
NewFloatValue, CGH.get_specialization_constant<FloatId>(),
"float specializaiton constant after setting a new value"))
++Errors;

CGH.single_task<EmptyKernel>([=]() {});
}));

return Errors == 0;
}

bool test_set_and_get_on_device(sycl::queue Queue) {
sycl::buffer<int> IntBuffer(1);
IntBuffer.set_write_back(false);
sycl::buffer<int> IntBuffer2(1);
IntBuffer2.set_write_back(false);
sycl::buffer<float> FloatBuffer(1);
FloatBuffer.set_write_back(false);

int NewIntValue = 8;
int NewIntValue2 = 0;
float NewFloatValue = 3.0f;

{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(
Graph, Queue, ([&](sycl::handler &CGH) {
auto IntAcc = IntBuffer.get_access<sycl::access::mode::write>(CGH);
auto IntAcc2 = IntBuffer2.get_access<sycl::access::mode::write>(CGH);
auto FloatAcc =
FloatBuffer.get_access<sycl::access::mode::write>(CGH);

CGH.set_specialization_constant<IntId>(NewIntValue);
CGH.set_specialization_constant<IntId2>(NewIntValue2);
CGH.set_specialization_constant<FloatId>(NewFloatValue);

CGH.single_task<TestSetAndGetOnDevice>([=](sycl::kernel_handler KH) {
IntAcc[0] = KH.get_specialization_constant<IntId>();
IntAcc2[0] = KH.get_specialization_constant<IntId2>();
FloatAcc[0] = KH.get_specialization_constant<FloatId>();
});
}));

auto GraphExec = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
Queue.wait_and_throw();
}

unsigned Errors = 0;
sycl::host_accessor IntAcc(IntBuffer, sycl::read_only);
if (!check_value(NewIntValue, IntAcc[0], "integer specialization constant"))
Errors++;

sycl::host_accessor IntAcc2(IntBuffer2, sycl::read_only);
if (!check_value(NewIntValue2, IntAcc2[0], "integer specialization constant"))
Errors++;

sycl::host_accessor FloatAcc(FloatBuffer, sycl::read_only);
if (!check_value(NewFloatValue, FloatAcc[0], "float specialization constant"))
Errors++;

return Errors == 0;
}
Loading