Skip to content

Commit 8d8d3f4

Browse files
EwanCreblejulianmiBensuo
authored
[SYCL][Graph] Throw an exception when unsupported features used in a graph (#10789)
This PR contains a set of changes that implement throwing an exception when a feature unsupported by [sycl_ext_oneapi_graph](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc) is used. An error is thrown when an application uses the following features with the graph extension: * An unsupported extension. * Querying the event returned from graph submission for profiling information. * Level Zero immediate command-list (see #10467) * Specialization constants, kernel bundles, or reductions are used in a graph node. ## Authors Co-authored-by: Pablo Reble <[email protected]> Co-authored-by: Julian Miller <[email protected]> Co-authored-by: Ben Tracy <[email protected]> Co-authored-by: Ewan Crawford <[email protected]> Co-authored-by: Maxime France-Pillois <[email protected]>
1 parent 8cf5033 commit 8d8d3f4

File tree

16 files changed

+1120
-28
lines changed

16 files changed

+1120
-28
lines changed

sycl/include/sycl/ext/oneapi/experimental/graph.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,44 @@ namespace oneapi {
3131
namespace experimental {
3232

3333
namespace detail {
34+
// List of sycl features and extensions which are not supported by graphs. Used
35+
// for throwing errors when these features are used with graphs.
36+
enum class UnsupportedGraphFeatures {
37+
sycl_reductions = 0,
38+
sycl_specialization_constants = 1,
39+
sycl_kernel_bundle = 2,
40+
sycl_ext_oneapi_kernel_properties = 3,
41+
sycl_ext_oneapi_enqueue_barrier = 4,
42+
sycl_ext_oneapi_memcpy2d = 5,
43+
sycl_ext_oneapi_device_global = 6,
44+
sycl_ext_oneapi_bindless_images = 7
45+
};
46+
47+
constexpr const char *
48+
UnsupportedFeatureToString(UnsupportedGraphFeatures Feature) {
49+
using UGF = UnsupportedGraphFeatures;
50+
switch (Feature) {
51+
case UGF::sycl_reductions:
52+
return "Reductions";
53+
case UGF::sycl_specialization_constants:
54+
return "Specialization Constants";
55+
case UGF::sycl_kernel_bundle:
56+
return "Kernel Bundles";
57+
case UGF::sycl_ext_oneapi_kernel_properties:
58+
return "sycl_ext_oneapi_kernel_properties";
59+
case UGF::sycl_ext_oneapi_enqueue_barrier:
60+
return "sycl_ext_oneapi_enqueue_barrier";
61+
case UGF::sycl_ext_oneapi_memcpy2d:
62+
return "sycl_ext_oneapi_memcpy2d";
63+
case UGF::sycl_ext_oneapi_device_global:
64+
return "sycl_ext_oneapi_device_global";
65+
case UGF::sycl_ext_oneapi_bindless_images:
66+
return "sycl_ext_oneapi_bindless_images";
67+
default:
68+
return {};
69+
}
70+
}
71+
3472
class node_impl;
3573
class graph_impl;
3674
class exec_graph_impl;

sycl/include/sycl/handler.hpp

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ class pipe;
135135

136136
namespace ext::oneapi::experimental::detail {
137137
class graph_impl;
138-
}
138+
} // namespace ext::oneapi::experimental::detail
139139
namespace detail {
140140

141141
class handler_impl;
@@ -1578,6 +1578,10 @@ class __SYCL_EXPORT handler {
15781578
void set_specialization_constant(
15791579
typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {
15801580

1581+
throwIfGraphAssociated<
1582+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1583+
sycl_specialization_constants>();
1584+
15811585
setStateSpecConstSet();
15821586

15831587
std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
@@ -1592,6 +1596,10 @@ class __SYCL_EXPORT handler {
15921596
typename std::remove_reference_t<decltype(SpecName)>::value_type
15931597
get_specialization_constant() const {
15941598

1599+
throwIfGraphAssociated<
1600+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1601+
sycl_specialization_constants>();
1602+
15951603
if (isStateExplicitKernelBundle())
15961604
throw sycl::exception(make_error_code(errc::invalid),
15971605
"Specialization constants cannot be read after "
@@ -2107,6 +2115,7 @@ class __SYCL_EXPORT handler {
21072115
std::enable_if_t<
21082116
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21092117
single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) {
2118+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21102119
single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
21112120
KernelFunc);
21122121
}
@@ -2117,6 +2126,7 @@ class __SYCL_EXPORT handler {
21172126
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21182127
parallel_for(range<1> NumWorkItems, PropertiesT Props,
21192128
_KERNELFUNCPARAM(KernelFunc)) {
2129+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21202130
parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
21212131
NumWorkItems, Props, std::move(KernelFunc));
21222132
}
@@ -2127,6 +2137,7 @@ class __SYCL_EXPORT handler {
21272137
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21282138
parallel_for(range<2> NumWorkItems, PropertiesT Props,
21292139
_KERNELFUNCPARAM(KernelFunc)) {
2140+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21302141
parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
21312142
NumWorkItems, Props, std::move(KernelFunc));
21322143
}
@@ -2137,6 +2148,7 @@ class __SYCL_EXPORT handler {
21372148
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21382149
parallel_for(range<3> NumWorkItems, PropertiesT Props,
21392150
_KERNELFUNCPARAM(KernelFunc)) {
2151+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21402152
parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
21412153
NumWorkItems, Props, std::move(KernelFunc));
21422154
}
@@ -2147,6 +2159,7 @@ class __SYCL_EXPORT handler {
21472159
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21482160
parallel_for(nd_range<Dims> Range, PropertiesT Properties,
21492161
_KERNELFUNCPARAM(KernelFunc)) {
2162+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21502163
parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
21512164
}
21522165

@@ -2159,6 +2172,9 @@ class __SYCL_EXPORT handler {
21592172
detail::AreAllButLastReductions<RestT...>::value &&
21602173
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21612174
parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) {
2175+
throwIfGraphAssociated<ext::oneapi::experimental::detail::
2176+
UnsupportedGraphFeatures::sycl_reductions>();
2177+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21622178
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
21632179
std::forward<RestT>(Rest)...);
21642180
}
@@ -2170,6 +2186,9 @@ class __SYCL_EXPORT handler {
21702186
detail::AreAllButLastReductions<RestT...>::value &&
21712187
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21722188
parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) {
2189+
throwIfGraphAssociated<ext::oneapi::experimental::detail::
2190+
UnsupportedGraphFeatures::sycl_reductions>();
2191+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21732192
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
21742193
std::forward<RestT>(Rest)...);
21752194
}
@@ -2181,6 +2200,9 @@ class __SYCL_EXPORT handler {
21812200
detail::AreAllButLastReductions<RestT...>::value &&
21822201
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
21832202
parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) {
2203+
throwIfGraphAssociated<ext::oneapi::experimental::detail::
2204+
UnsupportedGraphFeatures::sycl_reductions>();
2205+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
21842206
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
21852207
std::forward<RestT>(Rest)...);
21862208
}
@@ -2216,6 +2238,8 @@ class __SYCL_EXPORT handler {
22162238
detail::AreAllButLastReductions<RestT...>::value &&
22172239
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
22182240
parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2241+
throwIfGraphAssociated<ext::oneapi::experimental::detail::
2242+
UnsupportedGraphFeatures::sycl_reductions>();
22192243
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
22202244
std::forward<RestT>(Rest)...);
22212245
}
@@ -2235,6 +2259,7 @@ class __SYCL_EXPORT handler {
22352259
int Dims, typename PropertiesT>
22362260
void parallel_for_work_group(range<Dims> NumWorkGroups, PropertiesT Props,
22372261
_KERNELFUNCPARAM(KernelFunc)) {
2262+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
22382263
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
22392264
PropertiesT>(NumWorkGroups, Props,
22402265
KernelFunc);
@@ -2245,6 +2270,7 @@ class __SYCL_EXPORT handler {
22452270
void parallel_for_work_group(range<Dims> NumWorkGroups,
22462271
range<Dims> WorkGroupSize, PropertiesT Props,
22472272
_KERNELFUNCPARAM(KernelFunc)) {
2273+
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
22482274
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
22492275
PropertiesT>(
22502276
NumWorkGroups, WorkGroupSize, Props, KernelFunc);
@@ -2552,6 +2578,9 @@ class __SYCL_EXPORT handler {
25522578
/// until all commands previously submitted to this queue have entered the
25532579
/// complete state.
25542580
void ext_oneapi_barrier() {
2581+
throwIfGraphAssociated<
2582+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2583+
sycl_ext_oneapi_enqueue_barrier>();
25552584
throwIfActionIsCreated();
25562585
setType(detail::CG::Barrier);
25572586
}
@@ -2637,6 +2666,9 @@ class __SYCL_EXPORT handler {
26372666
typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
26382667
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
26392668
size_t SrcPitch, size_t Width, size_t Height) {
2669+
throwIfGraphAssociated<
2670+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2671+
sycl_ext_oneapi_memcpy2d>();
26402672
throwIfActionIsCreated();
26412673
if (Width > DestPitch)
26422674
throw sycl::exception(sycl::make_error_code(errc::invalid),
@@ -2815,6 +2847,9 @@ class __SYCL_EXPORT handler {
28152847
void memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
28162848
const void *Src, size_t NumBytes = sizeof(T),
28172849
size_t DestOffset = 0) {
2850+
throwIfGraphAssociated<
2851+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2852+
sycl_ext_oneapi_device_global>();
28182853
if (sizeof(T) < DestOffset + NumBytes)
28192854
throw sycl::exception(make_error_code(errc::invalid),
28202855
"Copy to device_global is out of bounds.");
@@ -2847,6 +2882,9 @@ class __SYCL_EXPORT handler {
28472882
memcpy(void *Dest,
28482883
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
28492884
size_t NumBytes = sizeof(T), size_t SrcOffset = 0) {
2885+
throwIfGraphAssociated<
2886+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2887+
sycl_ext_oneapi_device_global>();
28502888
if (sizeof(T) < SrcOffset + NumBytes)
28512889
throw sycl::exception(make_error_code(errc::invalid),
28522890
"Copy from device_global is out of bounds.");
@@ -3368,8 +3406,34 @@ class __SYCL_EXPORT handler {
33683406
"handler::require() before it can be used.");
33693407
}
33703408

3409+
template <typename PropertiesT>
3410+
std::enable_if_t<
3411+
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
3412+
throwIfGraphAssociatedAndKernelProperties() const {
3413+
if (!std::is_same_v<PropertiesT,
3414+
ext::oneapi::experimental::detail::empty_properties_t>)
3415+
throwIfGraphAssociated<
3416+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3417+
sycl_ext_oneapi_kernel_properties>();
3418+
}
3419+
33713420
// Set value of the gpu cache configuration for the kernel.
33723421
void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig);
3422+
3423+
template <
3424+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT>
3425+
void throwIfGraphAssociated() const {
3426+
3427+
if (getCommandGraph()) {
3428+
std::string FeatureString =
3429+
ext::oneapi::experimental::detail::UnsupportedFeatureToString(
3430+
FeatureT);
3431+
throw sycl::exception(sycl::make_error_code(errc::invalid),
3432+
"The " + FeatureString +
3433+
" feature is not yet available "
3434+
"for use with the SYCL Graph extension.");
3435+
}
3436+
}
33733437
};
33743438
} // namespace _V1
33753439
} // namespace sycl

sycl/include/sycl/reduction.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1355,7 +1355,8 @@ struct NDRangeReduction<
13551355
sycl::atomic_ref<int, memory_order::acq_rel, memory_scope::device,
13561356
access::address_space::global_space>(
13571357
NWorkGroupsFinished[0]);
1358-
DoReducePartialSumsInLastWG[0] = ++NFinished == NWorkGroups;
1358+
DoReducePartialSumsInLastWG[0] =
1359+
++NFinished == static_cast<int>(NWorkGroups);
13591360
}
13601361

13611362
workGroupBarrier();

sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
663663
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
664664
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
665665
ur_event_handle_t *Event) {
666+
// There are issues with immediate command lists so return an error if the
667+
// queue is in that mode.
668+
if (Queue->UsingImmCmdLists) {
669+
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
670+
}
671+
666672
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
667673
// Use compute engine rather than copy engine
668674
const auto UseCopyEngine = false;

sycl/source/detail/event_impl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,11 @@ void event_impl::checkProfilingPreconditions() const {
278278
"Profiling information is unavailable as the queue associated with "
279279
"the event does not have the 'enable_profiling' property.");
280280
}
281+
if (MEventFromSubmitedExecCommandBuffer) {
282+
throw sycl::exception(make_error_code(sycl::errc::invalid),
283+
"Profiling information is unavailable for events "
284+
"returned by a graph submission.");
285+
}
281286
}
282287

283288
template <>

sycl/source/detail/event_impl.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,14 @@ class event_impl {
282282
return MGraph.lock();
283283
}
284284

285+
void setEventFromSubmitedExecCommandBuffer(bool value) {
286+
MEventFromSubmitedExecCommandBuffer = value;
287+
}
288+
289+
bool isEventFromSubmitedExecCommandBuffer() const {
290+
return MEventFromSubmitedExecCommandBuffer;
291+
}
292+
285293
protected:
286294
// When instrumentation is enabled emits trace event for event wait begin and
287295
// returns the telemetry event generated for the wait
@@ -332,6 +340,8 @@ class event_impl {
332340
/// Store the command graph associated with this event, if any.
333341
/// This event is also be stored in the graph so a weak_ptr is used.
334342
std::weak_ptr<ext::oneapi::experimental::detail::graph_impl> MGraph;
343+
/// Indicates that the event results from a command graph submission
344+
bool MEventFromSubmitedExecCommandBuffer = false;
335345

336346
// If this event represents a submission to a
337347
// sycl::detail::pi::PiExtCommandBuffer the sync point for that submission is

sycl/source/detail/fusion/fusion_wrapper_impl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ bool fusion_wrapper_impl::is_in_fusion_mode() const {
2727
}
2828

2929
void fusion_wrapper_impl::start_fusion() {
30+
if (MQueue->getCommandGraph()) {
31+
throw sycl::exception(sycl::make_error_code(errc::invalid),
32+
"SYCL kernel fusion can NOT be started "
33+
"on a queue that is in a recording state.");
34+
}
3035
detail::Scheduler::getInstance().startFusion(MQueue);
3136
}
3237

sycl/source/detail/graph_impl.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
372372
auto NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
373373
NewEvent->setContextImpl(Queue->getContextImplPtr());
374374
NewEvent->setStateIncomplete();
375+
NewEvent->setEventFromSubmitedExecCommandBuffer(true);
375376
return NewEvent;
376377
});
377378

@@ -395,7 +396,14 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
395396
->call_nocheck<
396397
sycl::detail::PiApiKind::piextEnqueueCommandBuffer>(
397398
CommandBuffer, Queue->getHandleRef(), 0, nullptr, OutEvent);
398-
if (Res != pi_result::PI_SUCCESS) {
399+
if (Res == pi_result::PI_ERROR_INVALID_QUEUE_PROPERTIES) {
400+
throw sycl::exception(
401+
make_error_code(errc::invalid),
402+
"Graphs cannot be submitted to a queue which uses "
403+
"immediate command lists. Use "
404+
"sycl::ext::intel::property::queue::no_immediate_"
405+
"command_list to disable them.");
406+
} else if (Res != pi_result::PI_SUCCESS) {
399407
throw sycl::exception(
400408
errc::event,
401409
"Failed to enqueue event for command buffer submission");
@@ -509,6 +517,12 @@ modifiable_command_graph::finalize(const sycl::property_list &) const {
509517
bool modifiable_command_graph::begin_recording(queue &RecordingQueue) {
510518
auto QueueImpl = sycl::detail::getSyclObjImpl(RecordingQueue);
511519

520+
if (QueueImpl->is_in_fusion_mode()) {
521+
throw sycl::exception(sycl::make_error_code(errc::invalid),
522+
"SYCL queue in kernel in fusion mode "
523+
"can NOT be recorded.");
524+
}
525+
512526
if (QueueImpl->get_context() != impl->getContext()) {
513527
throw sycl::exception(sycl::make_error_code(errc::invalid),
514528
"begin_recording called for a queue whose context "

0 commit comments

Comments
 (0)