Skip to content

Commit b0212c3

Browse files
[SYCL] Honor dependencies of empty command groups (#16203)
This is a recommit of #16180. Post-commit CI had a few tests failing so the original PR was reverted. The reason of the failures is that with proper dependencies tracking we now try to actually create images on the device and it might fail if device isn't capable of that. This updated version of the PR also modifies those failing tests to skip checks on unsupported devices Co-authored-by: Sergey Semenov <[email protected]>
1 parent 530354f commit b0212c3

File tree

5 files changed

+89
-97
lines changed

5 files changed

+89
-97
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3592,10 +3592,21 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35923592

35933593
return UR_RESULT_SUCCESS;
35943594
}
3595-
case CGType::None:
3596-
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
3597-
"CG type not implemented. " +
3598-
codeToString(UR_RESULT_ERROR_INVALID_OPERATION));
3595+
case CGType::None: {
3596+
if (RawEvents.empty()) {
3597+
// urEnqueueEventsWait with zero events acts like a barrier which is NOT
3598+
// what we want here. On the other hand, there is nothing to wait for, so
3599+
// we don't need to enqueue anything.
3600+
return UR_RESULT_SUCCESS;
3601+
}
3602+
const detail::AdapterPtr &Adapter = MQueue->getAdapter();
3603+
ur_event_handle_t Event;
3604+
ur_result_t Result = Adapter->call_nocheck<UrApiKind::urEnqueueEventsWait>(
3605+
MQueue->getHandleRef(), RawEvents.size(),
3606+
RawEvents.size() ? &RawEvents[0] : nullptr, &Event);
3607+
MEvent->setHandle(Event);
3608+
return Result;
3609+
}
35993610
}
36003611
return UR_RESULT_ERROR_INVALID_OPERATION;
36013612
}

sycl/source/handler.cpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -496,21 +496,8 @@ event handler::finalize() {
496496
MCodeLoc));
497497
break;
498498
case detail::CGType::None:
499-
if (detail::ur::trace(detail::ur::TraceLevel::TRACE_ALL)) {
500-
std::cout << "WARNING: An empty command group is submitted." << std::endl;
501-
}
502-
503-
// Empty nodes are handled by Graph like standard nodes
504-
// For Standard mode (non-graph),
505-
// empty nodes are not sent to the scheduler to save time
506-
if (impl->MGraph || (MQueue && MQueue->getCommandGraph())) {
507-
CommandGroup.reset(new detail::CG(detail::CGType::None,
508-
std::move(impl->CGData), MCodeLoc));
509-
} else {
510-
detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
511-
MLastEvent = detail::createSyclObjFromImpl<event>(Event);
512-
return MLastEvent;
513-
}
499+
CommandGroup.reset(new detail::CG(detail::CGType::None,
500+
std::move(impl->CGData), MCodeLoc));
514501
break;
515502
}
516503

sycl/test-e2e/Basic/empty_command.cpp

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,11 @@ void test_host_task_dep() {
2626
auto empty_cg_event =
2727
q.submit([&](handler &cgh) { cgh.depends_on(host_event); });
2828

29-
// FIXME: This should deadlock, but the dependency is ignored currently.
30-
empty_cg_event.wait();
31-
3229
assert(x == 0);
3330
start_execution.count_down();
3431

3532
empty_cg_event.wait();
36-
// FIXME: uncomment once the bug mentioned above is fixed.
37-
// assert(x == 42);
38-
39-
// I'm seeing some weird hang without this:
40-
host_event.wait();
33+
assert(x == 42);
4134
}
4235

4336
void test_device_event_dep() {
@@ -53,17 +46,12 @@ void test_device_event_dep() {
5346
auto empty_cg_event =
5447
q.submit([&](handler &cgh) { cgh.depends_on(device_event); });
5548

56-
// FIXME: This should deadlock, but the dependency is ignored currently.
57-
empty_cg_event.wait();
58-
5949
assert(*p == 0);
6050
start_execution.count_down();
6151

6252
empty_cg_event.wait();
63-
// FIXME: uncomment once the bug mentioned above is fixed.
64-
// assert(*p == 42);
53+
assert(*p == 42);
6554

66-
q.wait();
6755
sycl::free(p, q);
6856
}
6957

@@ -90,17 +78,12 @@ void test_accessor_dep() {
9078
auto empty_cg_event =
9179
q.submit([&](handler &cgh) { sycl::accessor a{b, cgh}; });
9280

93-
// FIXME: This should deadlock, but the dependency is ignored currently.
94-
empty_cg_event.wait();
95-
9681
assert(*p == 0);
9782
start_execution.count_down();
9883

9984
empty_cg_event.wait();
100-
// FIXME: uncomment once the bug mentioned above is fixed.
101-
// assert(*p == 42);
85+
assert(*p == 42);
10286

103-
q.wait();
10487
sycl::free(p, q);
10588
}
10689

sycl/test-e2e/WeakObject/weak_object_utils.hpp

Lines changed: 68 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -104,21 +104,6 @@ template <template <typename> typename CallableT> void runTest(sycl::queue Q) {
104104
sycl::local_accessor<int, 2> LAcc2D{sycl::range<2>{1, 2}, CGH};
105105
sycl::local_accessor<int, 3> LAcc3D{sycl::range<3>{1, 2, 3}, CGH};
106106
sycl::stream Stream{1024, 32, CGH};
107-
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
108-
sycl::image_target::host_task>
109-
UImgAcc1D{UImg1D, CGH};
110-
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
111-
sycl::image_target::host_task>
112-
UImgAcc2D{UImg2D, CGH};
113-
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
114-
sycl::image_target::host_task>
115-
UImgAcc3D{UImg3D, CGH};
116-
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
117-
SImgAcc1D{SImg1D, CGH};
118-
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
119-
SImgAcc2D{SImg2D, CGH};
120-
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
121-
SImgAcc3D{SImg3D, CGH};
122107

123108
CallableT<decltype(DAcc1D)>()(DAcc1D);
124109
CallableT<decltype(DAcc2D)>()(DAcc2D);
@@ -127,13 +112,33 @@ template <template <typename> typename CallableT> void runTest(sycl::queue Q) {
127112
CallableT<decltype(LAcc2D)>()(LAcc2D);
128113
CallableT<decltype(LAcc3D)>()(LAcc3D);
129114
CallableT<decltype(Stream)>()(Stream);
130-
CallableT<decltype(UImgAcc1D)>()(UImgAcc1D);
131-
CallableT<decltype(UImgAcc2D)>()(UImgAcc2D);
132-
CallableT<decltype(UImgAcc3D)>()(UImgAcc3D);
133-
CallableT<decltype(SImgAcc1D)>()(SImgAcc1D);
134-
CallableT<decltype(SImgAcc2D)>()(SImgAcc2D);
135-
CallableT<decltype(SImgAcc3D)>()(SImgAcc3D);
136115
});
116+
if (Q.get_device().has(sycl::aspect::ext_intel_legacy_image)) {
117+
Q.submit([&](sycl::handler &CGH) {
118+
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
119+
sycl::image_target::host_task>
120+
UImgAcc1D{UImg1D, CGH};
121+
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
122+
sycl::image_target::host_task>
123+
UImgAcc2D{UImg2D, CGH};
124+
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
125+
sycl::image_target::host_task>
126+
UImgAcc3D{UImg3D, CGH};
127+
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
128+
SImgAcc1D{SImg1D, CGH};
129+
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
130+
SImgAcc2D{SImg2D, CGH};
131+
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
132+
SImgAcc3D{SImg3D, CGH};
133+
134+
CallableT<decltype(UImgAcc1D)>()(UImgAcc1D);
135+
CallableT<decltype(UImgAcc2D)>()(UImgAcc2D);
136+
CallableT<decltype(UImgAcc3D)>()(UImgAcc3D);
137+
CallableT<decltype(SImgAcc1D)>()(SImgAcc1D);
138+
CallableT<decltype(SImgAcc2D)>()(SImgAcc2D);
139+
CallableT<decltype(SImgAcc3D)>()(SImgAcc3D);
140+
});
141+
}
137142
}
138143

139144
template <template <typename> typename CallableT>
@@ -266,49 +271,54 @@ void runTestMulti(sycl::queue Q1) {
266271
sycl::local_accessor<int, 3> LAcc3D2{sycl::range<3>{1, 2, 3}, CGH};
267272
sycl::stream Stream1{1024, 32, CGH};
268273
sycl::stream Stream2{1024, 32, CGH};
269-
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
270-
sycl::image_target::host_task>
271-
UImgAcc1D1{UImg1D1, CGH};
272-
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
273-
sycl::image_target::host_task>
274-
UImgAcc2D1{UImg2D1, CGH};
275-
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
276-
sycl::image_target::host_task>
277-
UImgAcc3D1{UImg3D1, CGH};
278-
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
279-
sycl::image_target::host_task>
280-
UImgAcc1D2{UImg1D2, CGH};
281-
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
282-
sycl::image_target::host_task>
283-
UImgAcc2D2{UImg2D2, CGH};
284-
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
285-
sycl::image_target::host_task>
286-
UImgAcc3D2{UImg3D2, CGH};
287-
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
288-
SImgAcc1D1{SImg1D1, CGH};
289-
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
290-
SImgAcc2D1{SImg2D1, CGH};
291-
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
292-
SImgAcc3D1{SImg3D1, CGH};
293-
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
294-
SImgAcc1D2{SImg1D2, CGH};
295-
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
296-
SImgAcc2D2{SImg2D2, CGH};
297-
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
298-
SImgAcc3D2{SImg3D2, CGH};
299-
300274
CallableT<decltype(DAcc1D1)>()(DAcc1D1, DAcc1D2);
301275
CallableT<decltype(DAcc2D1)>()(DAcc2D1, DAcc2D2);
302276
CallableT<decltype(DAcc3D1)>()(DAcc3D1, DAcc3D2);
303277
CallableT<decltype(LAcc1D1)>()(LAcc1D1, LAcc1D2);
304278
CallableT<decltype(LAcc2D1)>()(LAcc2D1, LAcc2D2);
305279
CallableT<decltype(LAcc3D1)>()(LAcc3D1, LAcc3D2);
306280
CallableT<decltype(Stream1)>()(Stream1, Stream2);
307-
CallableT<decltype(UImgAcc1D1)>()(UImgAcc1D1, UImgAcc1D2);
308-
CallableT<decltype(UImgAcc2D1)>()(UImgAcc2D1, UImgAcc2D2);
309-
CallableT<decltype(UImgAcc3D1)>()(UImgAcc3D1, UImgAcc3D2);
310-
CallableT<decltype(SImgAcc1D1)>()(SImgAcc1D1, SImgAcc1D2);
311-
CallableT<decltype(SImgAcc2D1)>()(SImgAcc2D1, SImgAcc2D2);
312-
CallableT<decltype(SImgAcc3D1)>()(SImgAcc3D1, SImgAcc3D2);
313281
});
282+
283+
if (Q1.get_device().has(sycl::aspect::ext_intel_legacy_image)) {
284+
Q1.submit([&](sycl::handler &CGH) {
285+
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
286+
sycl::image_target::host_task>
287+
UImgAcc1D1{UImg1D1, CGH};
288+
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
289+
sycl::image_target::host_task>
290+
UImgAcc2D1{UImg2D1, CGH};
291+
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
292+
sycl::image_target::host_task>
293+
UImgAcc3D1{UImg3D1, CGH};
294+
sycl::unsampled_image_accessor<sycl::int4, 1, sycl::access_mode::read,
295+
sycl::image_target::host_task>
296+
UImgAcc1D2{UImg1D2, CGH};
297+
sycl::unsampled_image_accessor<sycl::int4, 2, sycl::access_mode::read,
298+
sycl::image_target::host_task>
299+
UImgAcc2D2{UImg2D2, CGH};
300+
sycl::unsampled_image_accessor<sycl::int4, 3, sycl::access_mode::read,
301+
sycl::image_target::host_task>
302+
UImgAcc3D2{UImg3D2, CGH};
303+
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
304+
SImgAcc1D1{SImg1D1, CGH};
305+
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
306+
SImgAcc2D1{SImg2D1, CGH};
307+
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
308+
SImgAcc3D1{SImg3D1, CGH};
309+
sycl::sampled_image_accessor<sycl::int4, 1, sycl::image_target::host_task>
310+
SImgAcc1D2{SImg1D2, CGH};
311+
sycl::sampled_image_accessor<sycl::int4, 2, sycl::image_target::host_task>
312+
SImgAcc2D2{SImg2D2, CGH};
313+
sycl::sampled_image_accessor<sycl::int4, 3, sycl::image_target::host_task>
314+
SImgAcc3D2{SImg3D2, CGH};
315+
316+
CallableT<decltype(UImgAcc1D1)>()(UImgAcc1D1, UImgAcc1D2);
317+
CallableT<decltype(UImgAcc2D1)>()(UImgAcc2D1, UImgAcc2D2);
318+
CallableT<decltype(UImgAcc3D1)>()(UImgAcc3D1, UImgAcc3D2);
319+
CallableT<decltype(SImgAcc1D1)>()(SImgAcc1D1, SImgAcc1D2);
320+
CallableT<decltype(SImgAcc2D1)>()(SImgAcc2D1, SImgAcc2D2);
321+
CallableT<decltype(SImgAcc3D1)>()(SImgAcc3D1, SImgAcc3D2);
322+
});
323+
}
314324
}

sycl/test-e2e/XPTI/image/accessors.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// REQUIRES: xptifw, opencl
2+
// REQUIRES: aspect-ext_intel_legacy_image
23
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
34
// RUN: %{build} -o %t.out
45
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.out | FileCheck %s

0 commit comments

Comments
 (0)