26
26
namespace sycl {
27
27
inline namespace _V1 {
28
28
namespace detail {
29
-
30
29
std::atomic<unsigned long long > queue_impl::MNextAvailableQueueID = 0 ;
31
30
31
+ static std::vector<sycl::detail::pi::PiEvent>
32
+ getPIEvents (const std::vector<sycl::event> &DepEvents) {
33
+ std::vector<sycl::detail::pi::PiEvent> RetPiEvents;
34
+ for (const sycl::event &Event : DepEvents) {
35
+ const EventImplPtr &EventImpl = detail::getSyclObjImpl (Event);
36
+ if (EventImpl->getHandleRef () != nullptr )
37
+ RetPiEvents.push_back (EventImpl->getHandleRef ());
38
+ }
39
+ return RetPiEvents;
40
+ }
41
+
32
42
template <>
33
43
uint32_t queue_impl::get_info<info::queue::reference_count>() const {
34
44
sycl::detail::pi::PiResult result = PI_SUCCESS;
@@ -63,16 +73,25 @@ static event createDiscardedEvent() {
63
73
64
74
const std::vector<event> &
65
75
queue_impl::getExtendDependencyList (const std::vector<event> &DepEvents,
66
- std::vector<event> &MutableVec) {
67
- if (isInOrder ()) {
68
- std::optional<event> ExternalEvent = popExternalEvent ();
69
- if (ExternalEvent) {
70
- MutableVec = DepEvents;
71
- MutableVec.push_back (*ExternalEvent);
72
- return MutableVec;
73
- }
74
- }
75
- return DepEvents;
76
+ std::vector<event> &MutableVec,
77
+ std::unique_lock<std::mutex> &QueueLock) {
78
+ if (!isInOrder ())
79
+ return DepEvents;
80
+
81
+ QueueLock.lock ();
82
+ EventImplPtr ExtraEvent =
83
+ MGraph.expired () ? MLastEventPtr : MGraphLastEventPtr;
84
+ std::optional<event> ExternalEvent = popExternalEvent ();
85
+
86
+ if (!ExternalEvent && !ExtraEvent)
87
+ return DepEvents;
88
+
89
+ MutableVec = DepEvents;
90
+ if (ExternalEvent)
91
+ MutableVec.push_back (*ExternalEvent);
92
+ if (ExtraEvent)
93
+ MutableVec.push_back (detail::createSyclObjFromImpl<event>(ExtraEvent));
94
+ return MutableVec;
76
95
}
77
96
78
97
event queue_impl::memset (const std::shared_ptr<detail::queue_impl> &Self,
@@ -106,7 +125,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
106
125
}
107
126
108
127
return submitMemOpHelper (
109
- Self, DepEvents,
128
+ Self, DepEvents, [&](handler &CGH) { CGH. memset (Ptr, Value, Count); },
110
129
[](const auto &...Args ) { MemoryManager::fill_usm (Args...); }, Ptr, Self,
111
130
Count, Value);
112
131
}
@@ -152,21 +171,17 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
152
171
#endif
153
172
// If we have a command graph set we need to capture the copy through normal
154
173
// queue submission rather than execute the copy directly.
155
- if (MGraph.lock ()) {
156
- return submit (
157
- [&](handler &CGH) {
158
- CGH.depends_on (DepEvents);
159
- CGH.memcpy (Dest, Src, Count);
160
- },
161
- Self, {});
162
- }
174
+ auto HandlerFunc = [&](handler &CGH) { CGH.memcpy (Dest, Src, Count); };
175
+ if (MGraph.lock ())
176
+ return submitWithHandler (Self, DepEvents, HandlerFunc);
177
+
163
178
if ((!Src || !Dest) && Count != 0 ) {
164
179
report (CodeLoc);
165
180
throw runtime_error (" NULL pointer argument in memory copy operation." ,
166
181
PI_ERROR_INVALID_VALUE);
167
182
}
168
183
return submitMemOpHelper (
169
- Self, DepEvents,
184
+ Self, DepEvents, HandlerFunc,
170
185
[](const auto &...Args ) { MemoryManager::copy_usm (Args...); }, Src, Self,
171
186
Count, Dest);
172
187
}
@@ -177,17 +192,12 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
177
192
const std::vector<event> &DepEvents) {
178
193
// If we have a command graph set we need to capture the advise through normal
179
194
// queue submission.
180
- if (MGraph.lock ()) {
181
- return submit (
182
- [&](handler &CGH) {
183
- CGH.depends_on (DepEvents);
184
- CGH.mem_advise (Ptr, Length, Advice);
185
- },
186
- Self, {});
187
- }
195
+ auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise (Ptr, Length, Advice); };
196
+ if (MGraph.lock ())
197
+ return submitWithHandler (Self, DepEvents, HandlerFunc);
188
198
189
199
return submitMemOpHelper (
190
- Self, DepEvents,
200
+ Self, DepEvents, HandlerFunc,
191
201
[](const auto &...Args ) { MemoryManager::advise_usm (Args...); }, Ptr,
192
202
Self, Length, Advice);
193
203
}
@@ -198,6 +208,10 @@ event queue_impl::memcpyToDeviceGlobal(
198
208
const std::vector<event> &DepEvents) {
199
209
return submitMemOpHelper (
200
210
Self, DepEvents,
211
+ [&](handler &CGH) {
212
+ CGH.memcpyToDeviceGlobal (DeviceGlobalPtr, Src, IsDeviceImageScope,
213
+ NumBytes, Offset);
214
+ },
201
215
[](const auto &...Args ) {
202
216
MemoryManager::copy_to_device_global (Args...);
203
217
},
@@ -210,15 +224,25 @@ event queue_impl::memcpyFromDeviceGlobal(
210
224
size_t Offset, const std::vector<event> &DepEvents) {
211
225
return submitMemOpHelper (
212
226
Self, DepEvents,
227
+ [&](handler &CGH) {
228
+ CGH.memcpyFromDeviceGlobal (Dest, DeviceGlobalPtr, IsDeviceImageScope,
229
+ NumBytes, Offset);
230
+ },
213
231
[](const auto &...Args ) {
214
232
MemoryManager::copy_from_device_global (Args...);
215
233
},
216
234
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
217
235
}
218
236
219
- event queue_impl::getLastEvent () const {
220
- std::lock_guard<std::mutex> Lock{MLastEventMtx};
221
- return MDiscardEvents ? createDiscardedEvent () : MLastEvent;
237
+ event queue_impl::getLastEvent () {
238
+ std::lock_guard<std::mutex> Lock{MMutex};
239
+ if (MDiscardEvents)
240
+ return createDiscardedEvent ();
241
+ if (!MGraph.expired () && MGraphLastEventPtr)
242
+ return detail::createSyclObjFromImpl<event>(MGraphLastEventPtr);
243
+ if (!MLastEventPtr)
244
+ MLastEventPtr = std::make_shared<event_impl>(std::nullopt);
245
+ return detail::createSyclObjFromImpl<event>(MLastEventPtr);
222
246
}
223
247
224
248
void queue_impl::addEvent (const event &Event) {
@@ -273,55 +297,90 @@ void queue_impl::addSharedEvent(const event &Event) {
273
297
MEventsShared.push_back (Event);
274
298
}
275
299
276
- template <typename MemOpFuncT, typename ... MemOpArgTs>
300
+ static bool
301
+ areEventsSafeForSchedulerBypass (const std::vector<sycl::event> &DepEvents,
302
+ ContextImplPtr Context) {
303
+ auto CheckEvent = [&Context](const sycl::event &Event) {
304
+ const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl (Event);
305
+ // Events that don't have an initialized context are throwaway events that
306
+ // don't represent actual dependencies. Calling getContextImpl() would set
307
+ // their context, which we wish to avoid as it is expensive.
308
+ if (!SyclEventImplPtr->isContextInitialized () &&
309
+ !SyclEventImplPtr->is_host ()) {
310
+ return true ;
311
+ }
312
+ if (SyclEventImplPtr->is_host ()) {
313
+ return SyclEventImplPtr->isCompleted ();
314
+ }
315
+ // Cross-context dependencies can't be passed to the backend directly.
316
+ if (SyclEventImplPtr->getContextImpl () != Context)
317
+ return false ;
318
+
319
+ // A nullptr here means that the commmand does not produce a PI event or it
320
+ // hasn't been enqueued yet.
321
+ return SyclEventImplPtr->getHandleRef () != nullptr ;
322
+ };
323
+
324
+ return std::all_of (DepEvents.begin (), DepEvents.end (),
325
+ [&Context, &CheckEvent](const sycl::event &Event) {
326
+ return CheckEvent (Event);
327
+ });
328
+ }
329
+
330
+ template <typename HandlerFuncT>
331
+ event queue_impl::submitWithHandler (const std::shared_ptr<queue_impl> &Self,
332
+ const std::vector<event> &DepEvents,
333
+ HandlerFuncT HandlerFunc) {
334
+ return submit (
335
+ [&](handler &CGH) {
336
+ CGH.depends_on (DepEvents);
337
+ HandlerFunc (CGH);
338
+ },
339
+ Self, {});
340
+ }
341
+
342
+ template <typename HandlerFuncT, typename MemOpFuncT, typename ... MemOpArgTs>
277
343
event queue_impl::submitMemOpHelper (const std::shared_ptr<queue_impl> &Self,
278
344
const std::vector<event> &DepEvents,
345
+ HandlerFuncT HandlerFunc,
279
346
MemOpFuncT MemOpFunc,
280
347
MemOpArgTs... MemOpArgs) {
281
- if (MHasDiscardEventsSupport) {
282
- MemOpFunc (MemOpArgs..., getOrWaitEvents (DepEvents, MContext),
283
- /* PiEvent*/ nullptr , /* EventImplPtr*/ nullptr );
284
- return createDiscardedEvent ();
285
- }
286
-
287
- event ResEvent = prepareSYCLEventAssociatedWithQueue (Self);
348
+ // We need to submit command and update the last event under same lock if we
349
+ // have in-order queue.
288
350
{
289
- // We need to submit command and update the last event under same lock if we
290
- // have in-order queue.
291
- auto ScopeLock = isInOrder () ? std::unique_lock<std::mutex>(MLastEventMtx)
292
- : std::unique_lock<std::mutex>();
293
- // If the last submitted command in the in-order queue is host_task then
294
- // wait for it before submitting usm command.
295
- if (isInOrder () && MLastCGType == CG::CGTYPE::CodeplayHostTask)
296
- MLastEvent.wait ();
351
+ std::unique_lock<std::mutex> Lock (MMutex, std::defer_lock);
297
352
298
353
std::vector<event> MutableDepEvents;
299
354
const std::vector<event> &ExpandedDepEvents =
300
- getExtendDependencyList (DepEvents, MutableDepEvents);
301
-
302
- auto EventImpl = detail::getSyclObjImpl (ResEvent);
303
- MemOpFunc (MemOpArgs..., getOrWaitEvents (ExpandedDepEvents, MContext),
304
- &EventImpl->getHandleRef (), EventImpl);
305
-
306
- if (MContext->is_host ())
307
- return MDiscardEvents ? createDiscardedEvent () : event ();
308
-
309
- // When a queue is recorded by a graph, the dependencies are managed in the
310
- // graph implementaton. Additionally, CG recorded for a graph are outside of
311
- // the in-order queue execution sequence. Therefore, these CG must not
312
- // update MLastEvent.
313
- if (isInOrder () && (getCommandGraph () == nullptr )) {
314
- MLastEvent = ResEvent;
315
- // We don't create a command group for usm commands, so set it to None.
316
- // This variable is used to perform explicit dependency management when
317
- // required.
318
- MLastCGType = CG::CGTYPE::None;
355
+ getExtendDependencyList (DepEvents, MutableDepEvents, Lock);
356
+
357
+ if (areEventsSafeForSchedulerBypass (ExpandedDepEvents, MContext)) {
358
+ if (MHasDiscardEventsSupport) {
359
+ MemOpFunc (MemOpArgs..., getPIEvents (ExpandedDepEvents),
360
+ /* PiEvent*/ nullptr , /* EventImplPtr*/ nullptr );
361
+ return createDiscardedEvent ();
362
+ }
363
+
364
+ event ResEvent = prepareSYCLEventAssociatedWithQueue (Self);
365
+ auto EventImpl = detail::getSyclObjImpl (ResEvent);
366
+ MemOpFunc (MemOpArgs..., getPIEvents (ExpandedDepEvents),
367
+ &EventImpl->getHandleRef (), EventImpl);
368
+
369
+ if (MContext->is_host ())
370
+ return MDiscardEvents ? createDiscardedEvent () : event ();
371
+
372
+ if (isInOrder ()) {
373
+ auto &EventToStoreIn =
374
+ MGraph.lock () ? MGraphLastEventPtr : MLastEventPtr;
375
+ EventToStoreIn = EventImpl;
376
+ }
377
+ // Track only if we won't be able to handle it with piQueueFinish.
378
+ if (MEmulateOOO)
379
+ addSharedEvent (ResEvent);
380
+ return discard_or_return (ResEvent);
319
381
}
320
382
}
321
- // Track only if we won't be able to handle it with piQueueFinish.
322
- if (MEmulateOOO)
323
- addSharedEvent (ResEvent);
324
- return MDiscardEvents ? createDiscardedEvent () : ResEvent;
383
+ return submitWithHandler (Self, DepEvents, HandlerFunc);
325
384
}
326
385
327
386
void *queue_impl::instrumentationProlog (const detail::code_location &CodeLoc,
@@ -495,9 +554,10 @@ bool queue_impl::ext_oneapi_empty() const {
495
554
// If we have in-order queue where events are not discarded then just check
496
555
// the status of the last event.
497
556
if (isInOrder () && !MDiscardEvents) {
498
- std::lock_guard<std::mutex> Lock (MLastEventMtx);
499
- return MLastEvent.get_info <info::event::command_execution_status>() ==
500
- info::event_command_status::complete;
557
+ std::lock_guard<std::mutex> Lock (MMutex);
558
+ return !MLastEventPtr ||
559
+ MLastEventPtr->get_info <info::event::command_execution_status>() ==
560
+ info::event_command_status::complete;
501
561
}
502
562
503
563
// Check the status of the backend queue if this is not a host queue.
@@ -533,6 +593,12 @@ bool queue_impl::ext_oneapi_empty() const {
533
593
return true ;
534
594
}
535
595
596
+ event queue_impl::discard_or_return (const event &Event) {
597
+ if (!(MDiscardEvents))
598
+ return Event;
599
+ return createDiscardedEvent ();
600
+ }
601
+
536
602
} // namespace detail
537
603
} // namespace _V1
538
604
} // namespace sycl
0 commit comments