Skip to content

Commit a1c1e04

Browse files
[SYCL] Fix error handling in non-blocking pipe operations (#13166)
When a non-blocking pipe operation fails, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST is expected. The runtime needs to handle that case instead of throwing the exception.
1 parent f2ac688 commit a1c1e04

File tree

10 files changed

+117
-28
lines changed

10 files changed

+117
-28
lines changed

sycl/include/sycl/ext/intel/experimental/pipes.hpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ namespace ext {
3939
namespace intel {
4040
namespace experimental {
4141

42-
// A helper templateless base class to get the host_pipe name.
42+
// A helper templateless base class.
4343
class pipe_base {
4444

4545
protected:
4646
pipe_base();
4747
~pipe_base();
4848

4949
__SYCL_EXPORT static std::string get_pipe_name(const void *HostPipePtr);
50+
__SYCL_EXPORT static bool wait_non_blocking(const event &E);
5051
};
5152

5253
template <class _name, class _dataT, int32_t _min_capacity = 0,
@@ -95,15 +96,13 @@ class pipe : public pipe_base {
9596
CGH.ext_intel_read_host_pipe(PipeName, DataPtr,
9697
sizeof(_dataT) /* non-blocking */);
9798
});
98-
E.wait();
99-
if (E.get_info<sycl::info::event::command_execution_status>() ==
100-
sycl::info::event_command_status::complete) {
101-
Success = true;
102-
return *(_dataT *)DataPtr;
103-
} else {
104-
Success = false;
105-
return _dataT();
106-
}
99+
// In OpenCL 1.0 waiting for a failed event does not return an error, so we
100+
// need to check the execution status here as well.
101+
Success = wait_non_blocking(E) &&
102+
E.get_info<sycl::info::event::command_execution_status>() ==
103+
sycl::info::event_command_status::complete;
104+
;
105+
return Success ? *(_dataT *)DataPtr : _dataT();
107106
}
108107

109108
static void write(queue &Q, const _dataT &Data, bool &Success,
@@ -126,9 +125,11 @@ class pipe : public pipe_base {
126125
CGH.ext_intel_write_host_pipe(PipeName, DataPtr,
127126
sizeof(_dataT) /* non-blocking */);
128127
});
129-
E.wait();
130-
Success = E.get_info<sycl::info::event::command_execution_status>() ==
131-
sycl::info::event_command_status::complete;
128+
// In OpenCL 1.0 waiting for a failed event does not return an error, so we
129+
// need to check the execution status here as well.
130+
Success = wait_non_blocking(E) &&
131+
E.get_info<sycl::info::event::command_execution_status>() ==
132+
sycl::info::event_command_status::complete;
132133
}
133134

134135
// Reading from pipe is lowered to SPIR-V instruction OpReadPipe via SPIR-V

sycl/source/detail/event_impl.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,23 @@ event_impl::~event_impl() {
5858
getPlugin()->call<PiApiKind::piEventRelease>(MEvent);
5959
}
6060

61-
void event_impl::waitInternal() {
61+
void event_impl::waitInternal(bool *Success) {
6262
if (!MHostEvent && MEvent) {
6363
// Wait for the native event
64-
getPlugin()->call<PiApiKind::piEventsWait>(1, &MEvent);
64+
sycl::detail::pi::PiResult Err =
65+
getPlugin()->call_nocheck<PiApiKind::piEventsWait>(1, &MEvent);
66+
// TODO drop the PI_ERROR_UKNOWN from here once the UR counterpart to
67+
// PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST is added:
68+
// https://github.com/oneapi-src/unified-runtime/issues/1459
69+
if (Success != nullptr &&
70+
(Err == PI_ERROR_UNKNOWN ||
71+
Err == PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST))
72+
*Success = false;
73+
else {
74+
getPlugin()->checkPiResult(Err);
75+
if (Success != nullptr)
76+
*Success = true;
77+
}
6578
} else if (MState == HES_Discarded) {
6679
// Waiting for the discarded event is invalid
6780
throw sycl::exception(
@@ -229,7 +242,8 @@ void event_impl::instrumentationEpilog(void *TelemetryEvent,
229242
#endif
230243
}
231244

232-
void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
245+
void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self,
246+
bool *Success) {
233247
if (MState == HES_Discarded)
234248
throw sycl::exception(make_error_code(errc::invalid),
235249
"wait method cannot be used for a discarded event.");
@@ -251,9 +265,9 @@ void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
251265
if (MEvent)
252266
// presence of MEvent means the command has been enqueued, so no need to
253267
// go via the slow path event waiting in the scheduler
254-
waitInternal();
268+
waitInternal(Success);
255269
else if (MCommand)
256-
detail::Scheduler::getInstance().waitForEvent(Self);
270+
detail::Scheduler::getInstance().waitForEvent(Self, Success);
257271

258272
#ifdef XPTI_ENABLE_INSTRUMENTATION
259273
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);

sycl/source/detail/event_impl.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,12 @@ class event_impl {
7575
/// Self is needed in order to pass shared_ptr to Scheduler.
7676
///
7777
/// \param Self is a pointer to this event.
78-
void wait(std::shared_ptr<sycl::detail::event_impl> Self);
78+
/// \param Success is an optional parameter that, when set to a non-null
79+
/// pointer, indicates that failure is a valid outcome for this wait
80+
/// (e.g., in case of a non-blocking read from a pipe), and the value
81+
/// it's pointing to is then set according to the outcome.
82+
void wait(std::shared_ptr<sycl::detail::event_impl> Self,
83+
bool *Success = nullptr);
7984

8085
/// Waits for the event.
8186
///
@@ -108,7 +113,11 @@ class event_impl {
108113
~event_impl();
109114

110115
/// Waits for the event with respect to device type.
111-
void waitInternal();
116+
/// \param Success is an optional parameter that, when set to a non-null
117+
/// pointer, indicates that failure is a valid outcome for this wait
118+
/// (e.g., in case of a non-blocking read from a pipe), and the value
119+
/// it's pointing to is then set according to the outcome.
120+
void waitInternal(bool *Success = nullptr);
112121

113122
/// Marks this event as completed.
114123
void setComplete();

sycl/source/detail/pipes.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <detail/event_impl.hpp>
910
#include <detail/host_pipe_map_entry.hpp>
1011
#include <detail/program_manager/program_manager.hpp>
1112
#include <sycl/ext/intel/experimental/pipes.hpp>
@@ -22,6 +23,14 @@ __SYCL_EXPORT std::string pipe_base::get_pipe_name(const void *HostPipePtr) {
2223
->MUniqueId;
2324
}
2425

26+
__SYCL_EXPORT bool pipe_base::wait_non_blocking(const event &E) {
27+
bool Success = false;
28+
std::shared_ptr<sycl::detail::event_impl> EImpl =
29+
sycl::detail::getSyclObjImpl(E);
30+
EImpl->wait(EImpl, &Success);
31+
return Success;
32+
}
33+
2534
} // namespace experimental
2635
} // namespace intel
2736
} // namespace ext

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static Command *getCommand(const EventImplPtr &Event) {
2424
void Scheduler::GraphProcessor::waitForEvent(const EventImplPtr &Event,
2525
ReadLockT &GraphReadLock,
2626
std::vector<Command *> &ToCleanUp,
27-
bool LockTheLock) {
27+
bool LockTheLock, bool *Success) {
2828
Command *Cmd = getCommand(Event);
2929
// Command can be nullptr if user creates sycl::event explicitly or the
3030
// event has been waited on by another thread
@@ -41,7 +41,7 @@ void Scheduler::GraphProcessor::waitForEvent(const EventImplPtr &Event,
4141
assert(Cmd->getEvent() == Event);
4242

4343
GraphReadLock.unlock();
44-
Event->waitInternal();
44+
Event->waitInternal(Success);
4545

4646
if (LockTheLock)
4747
GraphReadLock.lock();

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,13 @@ bool Scheduler::isInstanceAlive() {
266266
return GlobalHandler::instance().isSchedulerAlive();
267267
}
268268

269-
void Scheduler::waitForEvent(const EventImplPtr &Event) {
269+
void Scheduler::waitForEvent(const EventImplPtr &Event, bool *Success) {
270270
ReadLockT Lock = acquireReadLock();
271271
// It's fine to leave the lock unlocked upon return from waitForEvent as
272272
// there's no more actions to do here with graph
273273
std::vector<Command *> ToCleanUp;
274274
GraphProcessor::waitForEvent(std::move(Event), Lock, ToCleanUp,
275-
/*LockTheLock=*/false);
275+
/*LockTheLock=*/false, Success);
276276
cleanupCommands(ToCleanUp);
277277
}
278278

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,12 @@ class Scheduler {
396396
/// corresponding function of device API.
397397
///
398398
/// \param Event is a pointer to event to wait on.
399-
void waitForEvent(const EventImplPtr &Event);
399+
/// \param Success is an optional parameter that, when set to a non-null
400+
/// pointer, indicates that failure is a valid outcome for this wait
401+
/// (e.g., in case of a non-blocking read from a pipe), and the value
402+
/// it's pointing to is then set according to the outcome.
403+
404+
void waitForEvent(const EventImplPtr &Event, bool *Success = nullptr);
400405

401406
/// Removes buffer from the graph.
402407
///
@@ -886,13 +891,17 @@ class Scheduler {
886891
/// \param GraphReadLock read-lock which is already acquired for reading
887892
/// \param ToCleanUp container for commands that can be cleaned up.
888893
/// \param LockTheLock selects if graph lock should be locked upon return
894+
/// \param Success is an optional parameter that, when set to a non-null
895+
/// pointer, indicates that failure is a valid outcome for this wait
896+
/// (e.g., in case of a non-blocking read from a pipe), and the value
897+
/// it's pointing to is then set according to the outcome.
889898
///
890899
/// The function may unlock and lock GraphReadLock as needed. Upon return
891900
/// the lock is left in locked state if and only if LockTheLock is true.
892901
static void waitForEvent(const EventImplPtr &Event,
893902
ReadLockT &GraphReadLock,
894903
std::vector<Command *> &ToCleanUp,
895-
bool LockTheLock = true);
904+
bool LockTheLock = true, bool *Success = nullptr);
896905

897906
/// Enqueues the command and all its dependencies.
898907
///

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3656,6 +3656,7 @@ _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE
36563656
_ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE0EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_
36573657
_ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_
36583658
_ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv
3659+
_ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE
36593660
_ZN4sycl3_V13ext6oneapi10level_zero10make_eventERKNS0_7contextEmb
36603661
_ZN4sycl3_V13ext6oneapi10level_zero10make_queueERKNS0_7contextERKNS0_6deviceEmbbRKNS0_13property_listE
36613662
_ZN4sycl3_V13ext6oneapi10level_zero11make_deviceERKNS0_8platformEm

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5162,6 +5162,7 @@
51625162
?wait_and_throw@event@_V1@sycl@@SAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z
51635163
?wait_and_throw@queue@_V1@sycl@@QEAAXAEBUcode_location@detail@23@@Z
51645164
?wait_and_throw_proxy@queue@_V1@sycl@@QEAAXAEBUcode_location@detail@23@@Z
5165+
?wait_non_blocking@pipe_base@experimental@intel@ext@_V1@sycl@@KA_NAEBVevent@56@@Z
51655166
?wait_proxy@queue@_V1@sycl@@QEAAXAEBUcode_location@detail@23@@Z
51665167
?what@exception@_V1@sycl@@UEBAPEBDXZ
51675168
DllMain

sycl/unittests/pipes/host_pipe_registration.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,14 @@ static int PipeWriteVal = 0;
8181
pi_result redefinedEnqueueReadHostPipe(pi_queue, pi_program, const char *,
8282
pi_bool, void *ptr, size_t, pi_uint32,
8383
const pi_event *, pi_event *event) {
84+
*event = createDummyHandle<pi_event>();
8485
*(((int *)ptr)) = PipeReadVal;
8586
return PI_SUCCESS;
8687
}
8788
pi_result redefinedEnqueueWriteHostPipe(pi_queue, pi_program, const char *,
8889
pi_bool, void *ptr, size_t, pi_uint32,
8990
const pi_event *, pi_event *event) {
91+
*event = createDummyHandle<pi_event>();
9092
PipeWriteVal = 9;
9193
return PI_SUCCESS;
9294
}
@@ -142,14 +144,15 @@ class PipeTest : public ::testing::Test {
142144
queue q;
143145
};
144146

147+
static sycl::unittest::PiImage Img = generateDefaultImage();
148+
static sycl::unittest::PiImageArray<1> ImgArray{&Img};
149+
145150
TEST_F(PipeTest, Basic) {
146151
// Fake extension
147152
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
148153
after_piDeviceGetInfo);
149154

150155
// Device registration
151-
static sycl::unittest::PiImage Img = generateDefaultImage();
152-
static sycl::unittest::PiImageArray<1> ImgArray{&Img};
153156

154157
// Testing read
155158
int HostPipeReadData;
@@ -161,3 +164,45 @@ TEST_F(PipeTest, Basic) {
161164
Pipe::write(q, HostPipeWriteData);
162165
EXPECT_EQ(PipeWriteVal, 9);
163166
}
167+
168+
bool EventsWaitFails = true;
169+
pi_result redefinedEventsWait(pi_uint32 num_events,
170+
const pi_event *event_list) {
171+
return EventsWaitFails ? PI_ERROR_UNKNOWN : PI_SUCCESS;
172+
}
173+
174+
pi_result after_piEventGetInfo(pi_event event, pi_event_info param_name,
175+
size_t param_value_size, void *param_value,
176+
size_t *param_value_size_ret) {
177+
if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) {
178+
if (param_value)
179+
*static_cast<pi_event_status *>(param_value) = pi_event_status(-1);
180+
if (param_value_size_ret)
181+
*param_value_size_ret = sizeof(pi_event_status);
182+
}
183+
return PI_SUCCESS;
184+
}
185+
186+
TEST_F(PipeTest, NonBlockingOperationFail) {
187+
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
188+
after_piDeviceGetInfo);
189+
Mock.redefine<sycl::detail::PiApiKind::piEventsWait>(redefinedEventsWait);
190+
191+
bool Success = false;
192+
Pipe::read(q, Success);
193+
ASSERT_FALSE(Success);
194+
195+
Pipe::write(q, 0, Success);
196+
ASSERT_FALSE(Success);
197+
198+
// Test the OpenCL 1.0 case: no error code after waiting.
199+
EventsWaitFails = false;
200+
Mock.redefineAfter<sycl::detail::PiApiKind::piEventGetInfo>(
201+
after_piEventGetInfo);
202+
203+
Pipe::read(q, Success);
204+
ASSERT_FALSE(Success);
205+
206+
Pipe::write(q, 0, Success);
207+
ASSERT_FALSE(Success);
208+
}

0 commit comments

Comments
 (0)