Skip to content

Commit 45e49cf

Browse files
[NFCI][SYCL] Refactor duplicated queue shortcut code (#12474)
1 parent d3c8a7e commit 45e49cf

File tree

2 files changed

+90
-229
lines changed

2 files changed

+90
-229
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 75 additions & 229 deletions
Original file line numberDiff line numberDiff line change
@@ -105,51 +105,10 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
105105
"for use with the SYCL Graph extension.");
106106
}
107107

108-
if (MHasDiscardEventsSupport) {
109-
MemoryManager::fill_usm(Ptr, Self, Count, Value,
110-
getOrWaitEvents(DepEvents, MContext), nullptr);
111-
return createDiscardedEvent();
112-
}
113-
114-
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
115-
{
116-
// We need to submit command and update the last event under same lock if we
117-
// have in-order queue.
118-
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
119-
: std::unique_lock<std::mutex>();
120-
// If the last submitted command in the in-order queue is host_task then
121-
// wait for it before submitting usm command.
122-
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
123-
MLastEvent.wait();
124-
125-
std::vector<event> MutableDepEvents;
126-
const std::vector<event> &ExpandedDepEvents =
127-
getExtendDependencyList(DepEvents, MutableDepEvents);
128-
129-
auto EventImpl = detail::getSyclObjImpl(ResEvent);
130-
MemoryManager::fill_usm(Ptr, Self, Count, Value,
131-
getOrWaitEvents(ExpandedDepEvents, MContext),
132-
&EventImpl->getHandleRef(), EventImpl);
133-
134-
if (MContext->is_host())
135-
return MDiscardEvents ? createDiscardedEvent() : event();
136-
137-
// When a queue is recorded by a graph, the dependencies are managed in the
138-
// graph implementaton. Additionally, CG recorded for a graph are outside of
139-
// the in-order queue execution sequence. Therefore, these CG must not
140-
// update MLastEvent.
141-
if (isInOrder() && (getCommandGraph() == nullptr)) {
142-
MLastEvent = ResEvent;
143-
// We don't create a command group for usm commands, so set it to None.
144-
// This variable is used to perform explicit dependency management when
145-
// required.
146-
MLastCGType = CG::CGTYPE::None;
147-
}
148-
}
149-
// Track only if we won't be able to handle it with piQueueFinish.
150-
if (MEmulateOOO)
151-
addSharedEvent(ResEvent);
152-
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
108+
return submitMemOpHelper(
109+
Self, DepEvents,
110+
[](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self,
111+
Count, Value);
153112
}
154113

155114
void report(const code_location &CodeLoc) {
@@ -206,51 +165,10 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
206165
throw runtime_error("NULL pointer argument in memory copy operation.",
207166
PI_ERROR_INVALID_VALUE);
208167
}
209-
if (MHasDiscardEventsSupport) {
210-
MemoryManager::copy_usm(Src, Self, Count, Dest,
211-
getOrWaitEvents(DepEvents, MContext), nullptr);
212-
return createDiscardedEvent();
213-
}
214-
215-
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
216-
{
217-
// We need to submit command and update the last event under same lock if we
218-
// have in-order queue.
219-
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
220-
: std::unique_lock<std::mutex>();
221-
// If the last submitted command in the in-order queue is host_task then
222-
// wait for it before submitting usm command.
223-
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
224-
MLastEvent.wait();
225-
226-
std::vector<event> MutableDepEvents;
227-
const std::vector<event> &ExpandedDepEvents =
228-
getExtendDependencyList(DepEvents, MutableDepEvents);
229-
230-
auto EventImpl = detail::getSyclObjImpl(ResEvent);
231-
MemoryManager::copy_usm(Src, Self, Count, Dest,
232-
getOrWaitEvents(ExpandedDepEvents, MContext),
233-
&EventImpl->getHandleRef(), EventImpl);
234-
235-
if (MContext->is_host())
236-
return MDiscardEvents ? createDiscardedEvent() : event();
237-
238-
// When a queue is recorded by a graph, the dependencies are managed in the
239-
// graph implementaton. Additionally, CG recorded for a graph are outside of
240-
// the in-order queue execution sequence. Therefore, these CG must not
241-
// update MLastEvent.
242-
if (isInOrder() && (getCommandGraph() == nullptr)) {
243-
MLastEvent = ResEvent;
244-
// We don't create a command group for usm commands, so set it to None.
245-
// This variable is used to perform explicit dependency management when
246-
// required.
247-
MLastCGType = CG::CGTYPE::None;
248-
}
249-
}
250-
// Track only if we won't be able to handle it with piQueueFinish.
251-
if (MEmulateOOO)
252-
addSharedEvent(ResEvent);
253-
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
168+
return submitMemOpHelper(
169+
Self, DepEvents,
170+
[](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
171+
Count, Dest);
254172
}
255173

256174
event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
@@ -268,157 +186,34 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
268186
Self, {});
269187
}
270188

271-
if (MHasDiscardEventsSupport) {
272-
MemoryManager::advise_usm(Ptr, Self, Length, Advice,
273-
getOrWaitEvents(DepEvents, MContext), nullptr);
274-
return createDiscardedEvent();
275-
}
276-
277-
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
278-
{
279-
// We need to submit command and update the last event under same lock if we
280-
// have in-order queue.
281-
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
282-
: std::unique_lock<std::mutex>();
283-
// If the last submitted command in the in-order queue is host_task then
284-
// wait for it before submitting usm command.
285-
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
286-
MLastEvent.wait();
287-
288-
std::vector<event> MutableDepEvents;
289-
const std::vector<event> &ExpandedDepEvents =
290-
getExtendDependencyList(DepEvents, MutableDepEvents);
291-
292-
auto EventImpl = detail::getSyclObjImpl(ResEvent);
293-
MemoryManager::advise_usm(Ptr, Self, Length, Advice,
294-
getOrWaitEvents(ExpandedDepEvents, MContext),
295-
&EventImpl->getHandleRef(), EventImpl);
296-
297-
if (MContext->is_host())
298-
return MDiscardEvents ? createDiscardedEvent() : event();
299-
300-
// When a queue is recorded by a graph, the dependencies are managed in the
301-
// graph implementaton. Additionally, CG recorded for a graph are outside of
302-
// the in-order queue execution sequence. Therefore, these CG must not
303-
// update MLastEvent.
304-
if (isInOrder() && (getCommandGraph() == nullptr)) {
305-
MLastEvent = ResEvent;
306-
// We don't create a command group for usm commands, so set it to None.
307-
// This variable is used to perform explicit dependency management when
308-
// required.
309-
MLastCGType = CG::CGTYPE::None;
310-
}
311-
}
312-
// Track only if we won't be able to handle it with piQueueFinish.
313-
if (MEmulateOOO)
314-
addSharedEvent(ResEvent);
315-
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
189+
return submitMemOpHelper(
190+
Self, DepEvents,
191+
[](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
192+
Self, Length, Advice);
316193
}
317194

318195
event queue_impl::memcpyToDeviceGlobal(
319196
const std::shared_ptr<detail::queue_impl> &Self, void *DeviceGlobalPtr,
320197
const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset,
321198
const std::vector<event> &DepEvents) {
322-
if (MHasDiscardEventsSupport) {
323-
MemoryManager::copy_to_device_global(
324-
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
325-
getOrWaitEvents(DepEvents, MContext), nullptr);
326-
return createDiscardedEvent();
327-
}
328-
329-
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
330-
{
331-
// We need to submit command and update the last event under same lock if we
332-
// have in-order queue.
333-
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
334-
: std::unique_lock<std::mutex>();
335-
// If the last submitted command in the in-order queue is host_task then
336-
// wait for it before submitting usm command.
337-
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
338-
MLastEvent.wait();
339-
340-
std::vector<event> MutableDepEvents;
341-
const std::vector<event> &ExpandedDepEvents =
342-
getExtendDependencyList(DepEvents, MutableDepEvents);
343-
344-
auto EventImpl = detail::getSyclObjImpl(ResEvent);
345-
MemoryManager::copy_to_device_global(
346-
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
347-
getOrWaitEvents(ExpandedDepEvents, MContext),
348-
&EventImpl->getHandleRef(), EventImpl);
349-
350-
if (MContext->is_host())
351-
return MDiscardEvents ? createDiscardedEvent() : event();
352-
353-
// When a queue is recorded by a graph, the dependencies are managed in the
354-
// graph implementaton. Additionally, CG recorded for a graph are outside of
355-
// the in-order queue execution sequence. Therefore, these CG must not
356-
// update MLastEvent.
357-
if (isInOrder() && (getCommandGraph() == nullptr)) {
358-
MLastEvent = ResEvent;
359-
// We don't create a command group for usm commands, so set it to None.
360-
// This variable is used to perform explicit dependency management when
361-
// required.
362-
MLastCGType = CG::CGTYPE::None;
363-
}
364-
}
365-
// Track only if we won't be able to handle it with piQueueFinish.
366-
if (MEmulateOOO)
367-
addSharedEvent(ResEvent);
368-
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
199+
return submitMemOpHelper(
200+
Self, DepEvents,
201+
[](const auto &...Args) {
202+
MemoryManager::copy_to_device_global(Args...);
203+
},
204+
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src);
369205
}
370206

371207
event queue_impl::memcpyFromDeviceGlobal(
372208
const std::shared_ptr<detail::queue_impl> &Self, void *Dest,
373209
const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes,
374210
size_t Offset, const std::vector<event> &DepEvents) {
375-
if (MHasDiscardEventsSupport) {
376-
MemoryManager::copy_from_device_global(
377-
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
378-
getOrWaitEvents(DepEvents, MContext), nullptr);
379-
return createDiscardedEvent();
380-
}
381-
382-
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
383-
{
384-
// We need to submit command and update the last event under same lock if we
385-
// have in-order queue.
386-
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
387-
: std::unique_lock<std::mutex>();
388-
// If the last submitted command in the in-order queue is host_task then
389-
// wait for it before submitting usm command.
390-
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
391-
MLastEvent.wait();
392-
393-
std::vector<event> MutableDepEvents;
394-
const std::vector<event> &ExpandedDepEvents =
395-
getExtendDependencyList(DepEvents, MutableDepEvents);
396-
397-
auto EventImpl = detail::getSyclObjImpl(ResEvent);
398-
MemoryManager::copy_from_device_global(
399-
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
400-
getOrWaitEvents(ExpandedDepEvents, MContext),
401-
&EventImpl->getHandleRef(), EventImpl);
402-
403-
if (MContext->is_host())
404-
return MDiscardEvents ? createDiscardedEvent() : event();
405-
406-
// When a queue is recorded by a graph, the dependencies are managed in the
407-
// graph implementaton. Additionally, CG recorded for a graph are outside of
408-
// the in-order queue execution sequence. Therefore, these CG must not
409-
// update MLastEvent.
410-
if (isInOrder() && (getCommandGraph() == nullptr)) {
411-
MLastEvent = ResEvent;
412-
// We don't create a command group for usm commands, so set it to None.
413-
// This variable is used to perform explicit dependency management when
414-
// required.
415-
MLastCGType = CG::CGTYPE::None;
416-
}
417-
}
418-
// Track only if we won't be able to handle it with piQueueFinish.
419-
if (MEmulateOOO)
420-
addSharedEvent(ResEvent);
421-
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
211+
return submitMemOpHelper(
212+
Self, DepEvents,
213+
[](const auto &...Args) {
214+
MemoryManager::copy_from_device_global(Args...);
215+
},
216+
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
422217
}
423218

424219
event queue_impl::getLastEvent() const {
@@ -478,6 +273,57 @@ void queue_impl::addSharedEvent(const event &Event) {
478273
MEventsShared.push_back(Event);
479274
}
480275

276+
template <typename MemOpFuncT, typename... MemOpArgTs>
277+
event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
278+
const std::vector<event> &DepEvents,
279+
MemOpFuncT MemOpFunc,
280+
MemOpArgTs... MemOpArgs) {
281+
if (MHasDiscardEventsSupport) {
282+
MemOpFunc(MemOpArgs..., getOrWaitEvents(DepEvents, MContext),
283+
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
284+
return createDiscardedEvent();
285+
}
286+
287+
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
288+
{
289+
// We need to submit command and update the last event under same lock if we
290+
// have in-order queue.
291+
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
292+
: std::unique_lock<std::mutex>();
293+
// If the last submitted command in the in-order queue is host_task then
294+
// wait for it before submitting usm command.
295+
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
296+
MLastEvent.wait();
297+
298+
std::vector<event> MutableDepEvents;
299+
const std::vector<event> &ExpandedDepEvents =
300+
getExtendDependencyList(DepEvents, MutableDepEvents);
301+
302+
auto EventImpl = detail::getSyclObjImpl(ResEvent);
303+
MemOpFunc(MemOpArgs..., getOrWaitEvents(ExpandedDepEvents, MContext),
304+
&EventImpl->getHandleRef(), EventImpl);
305+
306+
if (MContext->is_host())
307+
return MDiscardEvents ? createDiscardedEvent() : event();
308+
309+
// When a queue is recorded by a graph, the dependencies are managed in the
310+
// graph implementaton. Additionally, CG recorded for a graph are outside of
311+
// the in-order queue execution sequence. Therefore, these CG must not
312+
// update MLastEvent.
313+
if (isInOrder() && (getCommandGraph() == nullptr)) {
314+
MLastEvent = ResEvent;
315+
// We don't create a command group for usm commands, so set it to None.
316+
// This variable is used to perform explicit dependency management when
317+
// required.
318+
MLastCGType = CG::CGTYPE::None;
319+
}
320+
}
321+
// Track only if we won't be able to handle it with piQueueFinish.
322+
if (MEmulateOOO)
323+
addSharedEvent(ResEvent);
324+
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
325+
}
326+
481327
void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
482328
std::string &Name, int32_t StreamID,
483329
uint64_t &IId) {

sycl/source/detail/queue_impl.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,21 @@ class queue_impl {
844844
return Event;
845845
}
846846

847+
/// Performs direct submission of a memory operation.
848+
///
849+
/// \param Self is a shared_ptr to this queue.
850+
/// \param DepEvents is a vector of dependencies of the operation.
851+
/// \param MemOpFunc is a function that forwards its arguments to the
852+
/// appropriate memory manager function.
853+
/// \param MemOpArgs are all the arguments that need to be passed to memory
854+
/// manager except the last three: dependencies, PI event and
855+
/// EventImplPtr are filled out by this helper.
856+
/// \return an event representing the submitted operation.
857+
template <typename MemOpFuncT, typename... MemOpArgTs>
858+
event submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
859+
const std::vector<event> &DepEvents,
860+
MemOpFuncT MemOpFunc, MemOpArgTs... MemOpArgs);
861+
847862
// When instrumentation is enabled emits trace event for wait begin and
848863
// returns the telemetry event generated for the wait
849864
void *instrumentationProlog(const detail::code_location &CodeLoc,

0 commit comments

Comments
 (0)