Skip to content

Commit 39db079

Browse files
[SYCL][Graph] Enable use of kernel properties (#12062)
Allows the use of sycl_ext_oneapi_kernel_properties extension in Sycl-Graph. Kernel properties passed to the backend using the sycl kernel object. Therefore, the graph implementation does not prevent the backend from accessing these properties. Removes exception throwing. Adds tests.
1 parent 4168793 commit 39db079

File tree

9 files changed

+422
-188
lines changed

9 files changed

+422
-188
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,11 +1292,8 @@ which is layered ontop of `sycl_ext_oneapi_graph`.
12921292

12931293
The new handler methods, and queue shortcuts, defined by
12941294
link:../experimental/sycl_ext_oneapi_kernel_properties.asciidoc[sycl_ext_oneapi_kernel_properties]
1295-
cannot be used in graph nodes. A synchronous exception will be thrown with error
1296-
code `invalid` if a user tries to add them to a graph.
1297-
1298-
Removing this restriction is something we may look at for future revisions of
1299-
`sycl_ext_oneapi_graph`.
1295+
can be used in graph nodes in the same way as they are used in normal queue
1296+
submission.
13001297

13011298
==== sycl_ext_oneapi_prod
13021299

sycl/include/sycl/handler.hpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,7 +2318,6 @@ class __SYCL_EXPORT handler {
23182318
std::enable_if_t<
23192319
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
23202320
single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) {
2321-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23222321
single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
23232322
KernelFunc);
23242323
}
@@ -2329,7 +2328,6 @@ class __SYCL_EXPORT handler {
23292328
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
23302329
parallel_for(range<1> NumWorkItems, PropertiesT Props,
23312330
_KERNELFUNCPARAM(KernelFunc)) {
2332-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23332331
parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
23342332
NumWorkItems, Props, std::move(KernelFunc));
23352333
}
@@ -2340,7 +2338,6 @@ class __SYCL_EXPORT handler {
23402338
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
23412339
parallel_for(range<2> NumWorkItems, PropertiesT Props,
23422340
_KERNELFUNCPARAM(KernelFunc)) {
2343-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23442341
parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
23452342
NumWorkItems, Props, std::move(KernelFunc));
23462343
}
@@ -2351,7 +2348,6 @@ class __SYCL_EXPORT handler {
23512348
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
23522349
parallel_for(range<3> NumWorkItems, PropertiesT Props,
23532350
_KERNELFUNCPARAM(KernelFunc)) {
2354-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23552351
parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
23562352
NumWorkItems, Props, std::move(KernelFunc));
23572353
}
@@ -2362,7 +2358,6 @@ class __SYCL_EXPORT handler {
23622358
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
23632359
parallel_for(nd_range<Dims> Range, PropertiesT Properties,
23642360
_KERNELFUNCPARAM(KernelFunc)) {
2365-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23662361
parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
23672362
}
23682363

@@ -2377,7 +2372,6 @@ class __SYCL_EXPORT handler {
23772372
parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) {
23782373
throwIfGraphAssociated<ext::oneapi::experimental::detail::
23792374
UnsupportedGraphFeatures::sycl_reductions>();
2380-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23812375
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
23822376
std::forward<RestT>(Rest)...);
23832377
}
@@ -2391,7 +2385,6 @@ class __SYCL_EXPORT handler {
23912385
parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) {
23922386
throwIfGraphAssociated<ext::oneapi::experimental::detail::
23932387
UnsupportedGraphFeatures::sycl_reductions>();
2394-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
23952388
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
23962389
std::forward<RestT>(Rest)...);
23972390
}
@@ -2405,7 +2398,6 @@ class __SYCL_EXPORT handler {
24052398
parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) {
24062399
throwIfGraphAssociated<ext::oneapi::experimental::detail::
24072400
UnsupportedGraphFeatures::sycl_reductions>();
2408-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
24092401
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
24102402
std::forward<RestT>(Rest)...);
24112403
}
@@ -2462,7 +2454,6 @@ class __SYCL_EXPORT handler {
24622454
int Dims, typename PropertiesT>
24632455
void parallel_for_work_group(range<Dims> NumWorkGroups, PropertiesT Props,
24642456
_KERNELFUNCPARAM(KernelFunc)) {
2465-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
24662457
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
24672458
PropertiesT>(NumWorkGroups, Props,
24682459
KernelFunc);
@@ -2473,7 +2464,6 @@ class __SYCL_EXPORT handler {
24732464
void parallel_for_work_group(range<Dims> NumWorkGroups,
24742465
range<Dims> WorkGroupSize, PropertiesT Props,
24752466
_KERNELFUNCPARAM(KernelFunc)) {
2476-
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
24772467
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
24782468
PropertiesT>(
24792469
NumWorkGroups, WorkGroupSize, Props, KernelFunc);
@@ -3622,17 +3612,6 @@ class __SYCL_EXPORT handler {
36223612
"handler::require() before it can be used.");
36233613
}
36243614

3625-
template <typename PropertiesT>
3626-
std::enable_if_t<
3627-
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
3628-
throwIfGraphAssociatedAndKernelProperties() const {
3629-
if (!std::is_same_v<PropertiesT,
3630-
ext::oneapi::experimental::empty_properties_t>)
3631-
throwIfGraphAssociated<
3632-
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3633-
sycl_ext_oneapi_kernel_properties>();
3634-
}
3635-
36363615
// Set value of the gpu cache configuration for the kernel.
36373616
void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig);
36383617

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if ext_oneapi_level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s %}
5+
//
6+
// CHECK-NOT: LEAK
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../Inputs/sub_group_prop.cpp"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if ext_oneapi_level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s %}
5+
//
6+
// CHECK-NOT: LEAK
7+
8+
// Temporarily disabled for CUDA.
9+
// XFAIL: cuda
10+
// Note: failing negative test with HIP in the original test
11+
// TODO: disable hip when HIP backend will be supported by Graph
12+
13+
#define GRAPH_E2E_EXPLICIT
14+
15+
#include "../Inputs/work_group_size_prop.cpp"
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// This test is adapted from "test-e2e/Basic/sub_group_size_prop.cpp"
2+
3+
#include "../graph_common.hpp"
4+
5+
enum class Variant { Function, Functor, FunctorAndProperty };
6+
7+
template <Variant KernelVariant, size_t SGSize> class SubGroupKernel;
8+
9+
template <size_t SGSize> struct KernelFunctorWithSGSizeProp {
10+
accessor<size_t, 1, access_mode::write> Acc;
11+
12+
KernelFunctorWithSGSizeProp(accessor<size_t, 1, access_mode::write> Acc)
13+
: Acc(Acc) {}
14+
15+
void operator()(nd_item<1> NdItem) const {
16+
auto SG = NdItem.get_sub_group();
17+
if (NdItem.get_global_linear_id() == 0)
18+
Acc[0] = SG.get_local_linear_range();
19+
}
20+
21+
auto get(sycl::ext::oneapi::experimental::properties_tag) {
22+
return sycl::ext::oneapi::experimental::properties{
23+
sycl::ext::oneapi::experimental::sub_group_size<SGSize>};
24+
}
25+
};
26+
27+
template <size_t SGSize>
28+
void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
29+
std::cout << "Testing sub_group_size property for sub-group size=" << SGSize
30+
<< std::endl;
31+
32+
auto SGSizeSupported =
33+
std::find(SupportedSGSizes.begin(), SupportedSGSizes.end(), SGSize) !=
34+
SupportedSGSizes.end();
35+
if (!SGSizeSupported) {
36+
std::cout << "Sub-group size " << SGSize
37+
<< " is not supported on the device." << std::endl;
38+
return;
39+
}
40+
41+
auto Props = ext::oneapi::experimental::properties{
42+
ext::oneapi::experimental::sub_group_size<SGSize>};
43+
44+
nd_range<1> NdRange(SGSize * 4, SGSize * 2);
45+
46+
size_t ReadSubGroupSize = 0;
47+
{
48+
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
49+
ReadSubGroupSizeBuf.set_write_back(false);
50+
51+
{
52+
exp_ext::command_graph Graph{
53+
Queue.get_context(),
54+
Queue.get_device(),
55+
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
56+
57+
add_node(Graph, Queue, [&](handler &CGH) {
58+
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
59+
sycl::write_only, sycl::no_init};
60+
61+
CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
62+
NdRange, Props, [=](nd_item<1> NdItem) {
63+
auto SG = NdItem.get_sub_group();
64+
if (NdItem.get_global_linear_id() == 0)
65+
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
66+
});
67+
});
68+
69+
auto ExecGraph = Graph.finalize();
70+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
71+
Queue.wait_and_throw();
72+
}
73+
74+
host_accessor HostAcc(ReadSubGroupSizeBuf);
75+
ReadSubGroupSize = HostAcc[0];
76+
}
77+
assert(ReadSubGroupSize == SGSize && "Failed check for function.");
78+
79+
ReadSubGroupSize = 0;
80+
{
81+
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
82+
ReadSubGroupSizeBuf.set_write_back(false);
83+
84+
{
85+
exp_ext::command_graph Graph{
86+
Queue.get_context(),
87+
Queue.get_device(),
88+
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
89+
90+
add_node(Graph, Queue, [&](handler &CGH) {
91+
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
92+
sycl::write_only, sycl::no_init};
93+
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{
94+
ReadSubGroupSizeBufAcc};
95+
96+
CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(
97+
NdRange, KernelFunctor);
98+
});
99+
100+
auto ExecGraph = Graph.finalize();
101+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
102+
Queue.wait_and_throw();
103+
}
104+
105+
host_accessor HostAcc(ReadSubGroupSizeBuf);
106+
ReadSubGroupSize = HostAcc[0];
107+
}
108+
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");
109+
110+
ReadSubGroupSize = 0;
111+
{
112+
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
113+
ReadSubGroupSizeBuf.set_write_back(false);
114+
115+
{
116+
exp_ext::command_graph Graph{
117+
Queue.get_context(),
118+
Queue.get_device(),
119+
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
120+
121+
add_node(Graph, Queue, [&](handler &CGH) {
122+
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
123+
sycl::write_only, sycl::no_init};
124+
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{
125+
ReadSubGroupSizeBufAcc};
126+
127+
CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(
128+
NdRange, Props, KernelFunctor);
129+
});
130+
131+
auto ExecGraph = Graph.finalize();
132+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
133+
Queue.wait_and_throw();
134+
}
135+
136+
host_accessor HostAcc(ReadSubGroupSizeBuf);
137+
ReadSubGroupSize = HostAcc[0];
138+
}
139+
assert(ReadSubGroupSize == SGSize &&
140+
"Failed check for functor and properties.");
141+
}
142+
143+
int main() {
144+
queue Queue({sycl::ext::intel::property::queue::no_immediate_command_list{}});
145+
146+
if (!are_graphs_supported(Queue)) {
147+
return 0;
148+
}
149+
150+
std::vector<size_t> SupportedSGSizes =
151+
Queue.get_device().get_info<info::device::sub_group_sizes>();
152+
153+
test<1>(Queue, SupportedSGSizes);
154+
test<8>(Queue, SupportedSGSizes);
155+
test<16>(Queue, SupportedSGSizes);
156+
test<32>(Queue, SupportedSGSizes);
157+
test<64>(Queue, SupportedSGSizes);
158+
159+
return 0;
160+
}

0 commit comments

Comments
 (0)