Skip to content

[SYCL] Use scheduler in queue shortcuts to avoid waiting for deps #11758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 52 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
ef0adff
draft beginning
KseniyaTikhomirova Oct 26, 2023
26322d0
Impl
KseniyaTikhomirova Nov 2, 2023
2bd4b1f
fix clang-format
KseniyaTikhomirova Nov 2, 2023
9dbeffa
last changes
KseniyaTikhomirova Nov 2, 2023
78b9801
update UT
KseniyaTikhomirova Nov 2, 2023
f6e0ca6
add UT
KseniyaTikhomirova Nov 2, 2023
a9afee7
Merge branch 'sycl' into inorder_enqueue_issues
KseniyaTikhomirova Nov 2, 2023
96652bc
add test for producesPiEVent
KseniyaTikhomirova Nov 2, 2023
b776832
improve
KseniyaTikhomirova Nov 6, 2023
5e97874
cleanup
KseniyaTikhomirova Nov 6, 2023
aee06b6
Merge branch 'sycl' into inorder_enqueue_issues
KseniyaTikhomirova Nov 9, 2023
ee25f0a
fix discard events usage
KseniyaTikhomirova Nov 10, 2023
5ada53c
fix comparison
KseniyaTikhomirova Nov 10, 2023
c002f7b
Merge branch 'sycl' into inorder_enqueue_issues
KseniyaTikhomirova Nov 21, 2023
cd184bb
support for graph
KseniyaTikhomirova Nov 22, 2023
654ada2
fix barrier submission
KseniyaTikhomirova Nov 22, 2023
cc0db2c
use common Mutex for last event usage
KseniyaTikhomirova Nov 22, 2023
072121d
fix test
KseniyaTikhomirova Nov 24, 2023
a235192
[SYCL] Remove WA for L0 for not immediate context usage
KseniyaTikhomirova Dec 6, 2023
f4c9b2e
Merge branch 'sycl' into inorder_enqueue_issues
KseniyaTikhomirova Dec 6, 2023
6560246
fix comments
KseniyaTikhomirova Dec 6, 2023
44eabe5
fix code-review comments
KseniyaTikhomirova Dec 6, 2023
bfcd9b6
fix tests
KseniyaTikhomirova Dec 6, 2023
1779330
Merge branch 'remove_L0_WA' into inorder_enqueue_issues
KseniyaTikhomirova Dec 6, 2023
93bee37
make producesPiEvent non virtual
KseniyaTikhomirova Dec 6, 2023
68b6b19
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 3, 2024
db06547
Revert "make producesPiEvent non virtual"
sergey-semenov Jan 4, 2024
d9bb40a
Revert "Merge branch 'remove_L0_WA' into inorder_enqueue_issues"
sergey-semenov Jan 4, 2024
8abee62
Check event handle instead of using producesPiEvent
sergey-semenov Jan 4, 2024
696861b
Revert unrelated changes
sergey-semenov Jan 4, 2024
ddf8b9f
Revert barrier changes
sergey-semenov Jan 4, 2024
9f2c79d
Apply comments
sergey-semenov Jan 4, 2024
3214f35
Create the last event if it doesn't exist
sergey-semenov Jan 8, 2024
0ba5561
Account for separate tracking of graph events in getLastEvent
sergey-semenov Jan 9, 2024
1abe98c
Appease clang-format
sergey-semenov Jan 9, 2024
5588795
Remove accidental edit
sergey-semenov Jan 9, 2024
a932351
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 9, 2024
d32f902
Reuse last non-graph event if a graph one doesn't exist
sergey-semenov Jan 9, 2024
985c1e8
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 17, 2024
3196bfc
Reuse getExtendDependencyList for the extra event
sergey-semenov Jan 17, 2024
4f3219d
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 17, 2024
0f79f76
Rename a function to account for other cases where scheduler can't be…
sergey-semenov Jan 17, 2024
2063434
Reduce code duplication for graph-related submissions
sergey-semenov Jan 17, 2024
7f21bb7
Adjust the failing test to new behavior
sergey-semenov Jan 18, 2024
5f5191d
Adjust the failing test
sergey-semenov Jan 18, 2024
e882b19
Refactor common submission code + apply other comments
sergey-semenov Jan 23, 2024
504f0f1
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 23, 2024
b52f9ed
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 24, 2024
b509f68
Apply clang-format & add/update comments
sergey-semenov Jan 24, 2024
e31dfe3
Trim unrelated edits
sergey-semenov Jan 24, 2024
c21ffa6
Merge branch 'sycl' into inorder_enqueue_issues
sergey-semenov Jan 25, 2024
15eb641
Fix comment typo
sergey-semenov Jan 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
}
};

auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
return discard_or_return(Event);
return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
#else
auto Event = submit_impl(CGF, CodeLoc);
return discard_or_return(Event);
return submit_impl(CGF, CodeLoc);
#endif // __SYCL_USE_FALLBACK_ASSERT
}

Expand Down Expand Up @@ -395,12 +393,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
}
};

auto Event =
submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, PostProcess);
return discard_or_return(Event);
return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
PostProcess);
#else
auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc);
return discard_or_return(Event);
return submit_impl(CGF, SecondaryQueue, CodeLoc);
#endif // __SYCL_USE_FALLBACK_ASSERT
}

Expand Down Expand Up @@ -2814,6 +2810,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {

/// Checks if the event needs to be discarded and if so, discards it and
/// returns a discarded event. Otherwise, it returns input event.
/// TODO: move to impl class in the next ABI Breaking window
event discard_or_return(const event &Event);

// Function to postprocess submitted command
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace sycl {
inline namespace _V1 {
using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
namespace detail {
// TODO: remove from public header files and implementation during the next ABI
// Breaking window. Not used any more.
std::vector<sycl::detail::pi::PiEvent>
getOrWaitEvents(std::vector<sycl::event> DepEvents, ContextImplPtr Context) {
std::vector<sycl::detail::pi::PiEvent> Events;
Expand Down
216 changes: 141 additions & 75 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,19 @@
namespace sycl {
inline namespace _V1 {
namespace detail {

std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 0;

static std::vector<sycl::detail::pi::PiEvent>
getPIEvents(const std::vector<sycl::event> &DepEvents) {
std::vector<sycl::detail::pi::PiEvent> RetPiEvents;
for (const sycl::event &Event : DepEvents) {
const EventImplPtr &EventImpl = detail::getSyclObjImpl(Event);
if (EventImpl->getHandleRef() != nullptr)
RetPiEvents.push_back(EventImpl->getHandleRef());
}
return RetPiEvents;
}

template <>
uint32_t queue_impl::get_info<info::queue::reference_count>() const {
sycl::detail::pi::PiResult result = PI_SUCCESS;
Expand Down Expand Up @@ -63,16 +73,25 @@ static event createDiscardedEvent() {

const std::vector<event> &
queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
std::vector<event> &MutableVec) {
if (isInOrder()) {
std::optional<event> ExternalEvent = popExternalEvent();
if (ExternalEvent) {
MutableVec = DepEvents;
MutableVec.push_back(*ExternalEvent);
return MutableVec;
}
}
return DepEvents;
std::vector<event> &MutableVec,
std::unique_lock<std::mutex> &QueueLock) {
if (!isInOrder())
return DepEvents;

QueueLock.lock();
EventImplPtr ExtraEvent =
MGraph.expired() ? MLastEventPtr : MGraphLastEventPtr;
std::optional<event> ExternalEvent = popExternalEvent();

if (!ExternalEvent && !ExtraEvent)
return DepEvents;

MutableVec = DepEvents;
if (ExternalEvent)
MutableVec.push_back(*ExternalEvent);
if (ExtraEvent)
MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
return MutableVec;
}

event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
Expand Down Expand Up @@ -106,7 +125,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
}

return submitMemOpHelper(
Self, DepEvents,
Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
[](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self,
Count, Value);
}
Expand Down Expand Up @@ -152,21 +171,17 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
#endif
// If we have a command graph set we need to capture the copy through normal
// queue submission rather than execute the copy directly.
if (MGraph.lock()) {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.memcpy(Dest, Src, Count);
},
Self, {});
}
auto HandlerFunc = [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

if ((!Src || !Dest) && Count != 0) {
report(CodeLoc);
throw runtime_error("NULL pointer argument in memory copy operation.",
PI_ERROR_INVALID_VALUE);
}
return submitMemOpHelper(
Self, DepEvents,
Self, DepEvents, HandlerFunc,
[](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
Count, Dest);
}
Expand All @@ -177,17 +192,12 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
const std::vector<event> &DepEvents) {
// If we have a command graph set we need to capture the advise through normal
// queue submission.
if (MGraph.lock()) {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.mem_advise(Ptr, Length, Advice);
},
Self, {});
}
auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

return submitMemOpHelper(
Self, DepEvents,
Self, DepEvents, HandlerFunc,
[](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
Self, Length, Advice);
}
Expand All @@ -198,6 +208,10 @@ event queue_impl::memcpyToDeviceGlobal(
const std::vector<event> &DepEvents) {
return submitMemOpHelper(
Self, DepEvents,
[&](handler &CGH) {
CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope,
NumBytes, Offset);
},
[](const auto &...Args) {
MemoryManager::copy_to_device_global(Args...);
},
Expand All @@ -210,15 +224,25 @@ event queue_impl::memcpyFromDeviceGlobal(
size_t Offset, const std::vector<event> &DepEvents) {
return submitMemOpHelper(
Self, DepEvents,
[&](handler &CGH) {
CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope,
NumBytes, Offset);
},
[](const auto &...Args) {
MemoryManager::copy_from_device_global(Args...);
},
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
}

event queue_impl::getLastEvent() const {
std::lock_guard<std::mutex> Lock{MLastEventMtx};
return MDiscardEvents ? createDiscardedEvent() : MLastEvent;
event queue_impl::getLastEvent() {
std::lock_guard<std::mutex> Lock{MMutex};
if (MDiscardEvents)
return createDiscardedEvent();
if (!MGraph.expired() && MGraphLastEventPtr)
return detail::createSyclObjFromImpl<event>(MGraphLastEventPtr);
if (!MLastEventPtr)
MLastEventPtr = std::make_shared<event_impl>(std::nullopt);
return detail::createSyclObjFromImpl<event>(MLastEventPtr);
}

void queue_impl::addEvent(const event &Event) {
Expand Down Expand Up @@ -273,55 +297,90 @@ void queue_impl::addSharedEvent(const event &Event) {
MEventsShared.push_back(Event);
}

template <typename MemOpFuncT, typename... MemOpArgTs>
static bool
areEventsSafeForSchedulerBypass(const std::vector<sycl::event> &DepEvents,
ContextImplPtr Context) {
auto CheckEvent = [&Context](const sycl::event &Event) {
const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
// Events that don't have an initialized context are throwaway events that
// don't represent actual dependencies. Calling getContextImpl() would set
// their context, which we wish to avoid as it is expensive.
if (!SyclEventImplPtr->isContextInitialized() &&
!SyclEventImplPtr->is_host()) {
return true;
}
if (SyclEventImplPtr->is_host()) {
return SyclEventImplPtr->isCompleted();
}
// Cross-context dependencies can't be passed to the backend directly.
if (SyclEventImplPtr->getContextImpl() != Context)
return false;

// A nullptr here means that the commmand does not produce a PI event or it
// hasn't been enqueued yet.
return SyclEventImplPtr->getHandleRef() != nullptr;
};

return std::all_of(DepEvents.begin(), DepEvents.end(),
[&Context, &CheckEvent](const sycl::event &Event) {
return CheckEvent(Event);
});
}

template <typename HandlerFuncT>
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &DepEvents,
HandlerFuncT HandlerFunc) {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
HandlerFunc(CGH);
},
Self, {});
}

template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &DepEvents,
HandlerFuncT HandlerFunc,
MemOpFuncT MemOpFunc,
MemOpArgTs... MemOpArgs) {
if (MHasDiscardEventsSupport) {
MemOpFunc(MemOpArgs..., getOrWaitEvents(DepEvents, MContext),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
return createDiscardedEvent();
}

event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
// We need to submit command and update the last event under same lock if we
// have in-order queue.
{
// We need to submit command and update the last event under same lock if we
// have in-order queue.
auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
: std::unique_lock<std::mutex>();
// If the last submitted command in the in-order queue is host_task then
// wait for it before submitting usm command.
if (isInOrder() && MLastCGType == CG::CGTYPE::CodeplayHostTask)
MLastEvent.wait();
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);

std::vector<event> MutableDepEvents;
const std::vector<event> &ExpandedDepEvents =
getExtendDependencyList(DepEvents, MutableDepEvents);

auto EventImpl = detail::getSyclObjImpl(ResEvent);
MemOpFunc(MemOpArgs..., getOrWaitEvents(ExpandedDepEvents, MContext),
&EventImpl->getHandleRef(), EventImpl);

if (MContext->is_host())
return MDiscardEvents ? createDiscardedEvent() : event();

// When a queue is recorded by a graph, the dependencies are managed in the
// graph implementaton. Additionally, CG recorded for a graph are outside of
// the in-order queue execution sequence. Therefore, these CG must not
// update MLastEvent.
if (isInOrder() && (getCommandGraph() == nullptr)) {
MLastEvent = ResEvent;
// We don't create a command group for usm commands, so set it to None.
// This variable is used to perform explicit dependency management when
// required.
MLastCGType = CG::CGTYPE::None;
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);

if (areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
if (MHasDiscardEventsSupport) {
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
return createDiscardedEvent();
}

event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
auto EventImpl = detail::getSyclObjImpl(ResEvent);
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
&EventImpl->getHandleRef(), EventImpl);

if (MContext->is_host())
return MDiscardEvents ? createDiscardedEvent() : event();

if (isInOrder()) {
auto &EventToStoreIn =
MGraph.lock() ? MGraphLastEventPtr : MLastEventPtr;
EventToStoreIn = EventImpl;
}
// Track only if we won't be able to handle it with piQueueFinish.
if (MEmulateOOO)
addSharedEvent(ResEvent);
return discard_or_return(ResEvent);
}
}
// Track only if we won't be able to handle it with piQueueFinish.
if (MEmulateOOO)
addSharedEvent(ResEvent);
return MDiscardEvents ? createDiscardedEvent() : ResEvent;
return submitWithHandler(Self, DepEvents, HandlerFunc);
}

void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
Expand Down Expand Up @@ -495,9 +554,10 @@ bool queue_impl::ext_oneapi_empty() const {
// If we have in-order queue where events are not discarded then just check
// the status of the last event.
if (isInOrder() && !MDiscardEvents) {
std::lock_guard<std::mutex> Lock(MLastEventMtx);
return MLastEvent.get_info<info::event::command_execution_status>() ==
info::event_command_status::complete;
std::lock_guard<std::mutex> Lock(MMutex);
return !MLastEventPtr ||
MLastEventPtr->get_info<info::event::command_execution_status>() ==
info::event_command_status::complete;
}

// Check the status of the backend queue if this is not a host queue.
Expand Down Expand Up @@ -533,6 +593,12 @@ bool queue_impl::ext_oneapi_empty() const {
return true;
}

event queue_impl::discard_or_return(const event &Event) {
if (!(MDiscardEvents))
return Event;
return createDiscardedEvent();
}

} // namespace detail
} // namespace _V1
} // namespace sycl
Loading