Skip to content

[SYCL][ABI-break] Add code_location parameter to the rest of sycl::queue methods #9603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 89 additions & 40 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// trivially copyable.
/// \param Count is the number of times to fill Pattern into Ptr.
/// \return an event representing fill operation.
template <typename T> event fill(void *Ptr, const T &Pattern, size_t Count) {
// TODO: to add code location as parameter when ABI break is permitted
const detail::code_location CodeLoc("sycl/queue.hpp", "fill", 0, 0);
return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
CodeLoc);
template <typename T>
event fill(void *Ptr, const T &Pattern,
size_t Count _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_CODELOCPARAM(&CodeLoc) must be paired with:

_CODELOCARG(&CodeLoc); between line 485 & 486:

Suggested change
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
template <typename T>
event fill(void *Ptr, const T &Pattern,
size_t Count _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);

If DISABLE_SYCL_INSTRUMENTATION_METADATA is enabled, then the code will not compile due to undefined variable. See proper use in other constructs such as submit


return submit([&](handler &CGH) {
CGH.fill<T>(Ptr, Pattern, Count);
} _CODELOCFW(CodeLoc));
}

/// Fills the specified memory with the specified pattern.
Expand All @@ -496,11 +500,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param DepEvent is an event that specifies the kernel dependencies.
/// \return an event representing fill operation.
template <typename T>
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent) {
event fill(void *Ptr, const T &Pattern, size_t Count,
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit([&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.fill<T>(Ptr, Pattern, Count);
});
} _CODELOCFW(CodeLoc));
}

/// Fills the specified memory with the specified pattern.
Expand All @@ -514,11 +521,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \return an event representing fill operation.
template <typename T>
event fill(void *Ptr, const T &Pattern, size_t Count,
const std::vector<event> &DepEvents) {
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit([&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.fill<T>(Ptr, Pattern, Count);
});
} _CODELOCFW(CodeLoc));
}

/// Fills the memory pointed by a USM pointer with the value specified.
Expand All @@ -530,7 +539,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Value is a value to be set. Value is cast as an unsigned char.
/// \param Count is a number of bytes to fill.
/// \return an event representing fill operation.
event memset(void *Ptr, int Value, size_t Count);
event memset(void *Ptr, int Value, size_t Count _CODELOCPARAM(&CodeLoc));

/// Fills the memory pointed by a USM pointer with the value specified.
/// No operations is done if \param Count is zero. An exception is thrown
Expand All @@ -542,7 +551,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Count is a number of bytes to fill.
/// \param DepEvent is an event that specifies the kernel dependencies.
/// \return an event representing fill operation.
event memset(void *Ptr, int Value, size_t Count, event DepEvent);
event memset(void *Ptr, int Value, size_t Count,
event DepEvent _CODELOCPARAM(&CodeLoc));

/// Fills the memory pointed by a USM pointer with the value specified.
/// No operations is done if \param Count is zero. An exception is thrown
Expand All @@ -556,7 +566,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// dependencies.
/// \return an event representing fill operation.
event memset(void *Ptr, int Value, size_t Count,
const std::vector<event> &DepEvents);
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc));

/// Copies data from one memory region to another, each is either a host
/// pointer or a pointer within USM allocation accessible on the device
Expand All @@ -569,7 +579,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Src is a USM pointer to the source memory.
/// \param Count is a number of bytes to copy.
/// \return an event representing copy operation.
event memcpy(void *Dest, const void *Src, size_t Count);
event memcpy(void *Dest, const void *Src,
size_t Count _CODELOCPARAM(&CodeLoc));

/// Copies data from one memory region to another, each is either a host
/// pointer or a pointer within USM allocation accessible on the device
Expand All @@ -583,7 +594,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Count is a number of bytes to copy.
/// \param DepEvent is an event that specifies the kernel dependencies.
/// \return an event representing copy operation.
event memcpy(void *Dest, const void *Src, size_t Count, event DepEvent);
event memcpy(void *Dest, const void *Src, size_t Count,
event DepEvent _CODELOCPARAM(&CodeLoc));

/// Copies data from one memory region to another, each is either a host
/// pointer or a pointer within USM allocation accessible on the device
Expand All @@ -599,7 +611,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// dependencies.
/// \return an event representing copy operation.
event memcpy(void *Dest, const void *Src, size_t Count,
const std::vector<event> &DepEvents);
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc));

/// Copies data from one memory region to another, each is either a host
/// pointer or a pointer within USM allocation accessible on the device
Expand Down Expand Up @@ -670,7 +682,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Advice is a device-defined advice for the specified allocation.
/// \return an event representing advice operation.
__SYCL2020_DEPRECATED("use the overload with int Advice instead")
event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice);
event mem_advise(const void *Ptr, size_t Length,
pi_mem_advice Advice _CODELOCPARAM(&CodeLoc));

/// Provides additional information to the underlying runtime about how
/// different allocations are used.
Expand All @@ -679,7 +692,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Length is a number of bytes in the allocation.
/// \param Advice is a device-defined advice for the specified allocation.
/// \return an event representing advice operation.
event mem_advise(const void *Ptr, size_t Length, int Advice);
event mem_advise(const void *Ptr, size_t Length,
int Advice _CODELOCPARAM(&CodeLoc));

/// Provides additional information to the underlying runtime about how
/// different allocations are used.
Expand All @@ -689,7 +703,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Advice is a device-defined advice for the specified allocation.
/// \param DepEvent is an event that specifies the kernel dependencies.
/// \return an event representing advice operation.
event mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent);
event mem_advise(const void *Ptr, size_t Length, int Advice,
event DepEvent _CODELOCPARAM(&CodeLoc));

/// Provides additional information to the underlying runtime about how
/// different allocations are used.
Expand All @@ -701,7 +716,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// dependencies.
/// \return an event representing advice operation.
event mem_advise(const void *Ptr, size_t Length, int Advice,
const std::vector<event> &DepEvents);
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc));

/// Provides hints to the runtime library that data should be made available
/// on a device earlier than Unified Shared Memory would normally require it
Expand All @@ -710,10 +725,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Ptr is a USM pointer to the memory to be prefetched to the device.
/// \param Count is a number of bytes to be prefetched.
/// \return an event representing prefetch operation.
event prefetch(const void *Ptr, size_t Count) {
// TODO: to add code location as parameter when ABI break is permitted
const detail::code_location CodeLoc("sycl/queue.hpp", "prefetch", 0, 0);
return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
event prefetch(const void *Ptr, size_t Count _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[=](handler &CGH) { CGH.prefetch(Ptr, Count); } _CODELOCFW(CodeLoc));
}

/// Provides hints to the runtime library that data should be made available
Expand All @@ -724,11 +740,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \param Count is a number of bytes to be prefetched.
/// \param DepEvent is an event that specifies the kernel dependencies.
/// \return an event representing prefetch operation.
event prefetch(const void *Ptr, size_t Count, event DepEvent) {
event prefetch(const void *Ptr, size_t Count,
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit([=](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.prefetch(Ptr, Count);
});
} _CODELOCFW(CodeLoc));
}

/// Provides hints to the runtime library that data should be made available
Expand All @@ -741,11 +760,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// dependencies.
/// \return an event representing prefetch operation.
event prefetch(const void *Ptr, size_t Count,
const std::vector<event> &DepEvents) {
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit([=](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.prefetch(Ptr, Count);
});
} _CODELOCFW(CodeLoc));
}

/// Copies data from one 2D memory region to another, both pointed by
Expand Down Expand Up @@ -1085,7 +1106,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
template <typename T, typename PropertyListT>
event memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
const void *Src, size_t NumBytes, size_t Offset,
const std::vector<event> &DepEvents) {
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this CodeLoc is placed in TLS, it is not being accessed and used in memcpyToDeviceGLobal. Shouldn't we be having a notification in this function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tovinkere this PR is not intended to any any extra instrumentation for XPTI. IT is needed to add code_location in API while we have a chance to do that. Any implementation gaps in terms of xpti notifications are not intended to be included here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KseniyaTikhomirova So, this PR is just ensure CodeLoc is added as a parameter and in the process break ABI during the ABI Breakage window?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

if (sizeof(T) < Offset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy to device_global is out of bounds.");
Expand All @@ -1096,7 +1119,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
return submit([&](handler &CGH) {
CGH.depends_on(DepEvents);
return CGH.memcpy(Dest, Src, NumBytes, Offset);
});
} _CODELOCFW(CodeLoc));
}

constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
Expand All @@ -1120,7 +1143,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
template <typename T, typename PropertyListT>
event memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
const void *Src, size_t NumBytes, size_t Offset,
event DepEvent) {
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, NumBytes, Offset,
std::vector<event>{DepEvent});
}
Expand All @@ -1138,7 +1163,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
template <typename T, typename PropertyListT>
event memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
const void *Src, size_t NumBytes = sizeof(T),
size_t Offset = 0) {
size_t Offset = 0 _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
}

Expand All @@ -1158,7 +1185,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event
memcpy(void *Dest,
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents) {
size_t NumBytes, size_t Offset,
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
if (sizeof(T) < Offset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy from device_global is out of bounds.");
Expand Down Expand Up @@ -1194,7 +1224,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event
memcpy(void *Dest,
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
size_t NumBytes, size_t Offset, event DepEvent) {
size_t NumBytes, size_t Offset,
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, NumBytes, Offset,
std::vector<event>{DepEvent});
}
Expand All @@ -1213,7 +1246,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event
memcpy(void *Dest,
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
size_t NumBytes = sizeof(T), size_t Offset = 0) {
size_t NumBytes = sizeof(T),
size_t Offset = 0 _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
}

Expand All @@ -1234,7 +1270,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event copy(const std::remove_all_extents_t<T> *Src,
ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
size_t Count, size_t StartIndex,
const std::vector<event> &DepEvents) {
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>),
DepEvents);
Expand All @@ -1256,7 +1294,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
template <typename T, typename PropertyListT>
event copy(const std::remove_all_extents_t<T> *Src,
ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
size_t Count, size_t StartIndex, event DepEvent) {
size_t Count, size_t StartIndex,
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>),
DepEvent);
Expand All @@ -1277,7 +1318,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event copy(const std::remove_all_extents_t<T> *Src,
ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
size_t StartIndex = 0) {
size_t StartIndex = 0 _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>));
}
Expand All @@ -1299,7 +1342,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event
copy(const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
const std::vector<event> &DepEvents) {
const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>),
DepEvents);
Expand All @@ -1322,7 +1367,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
event
copy(const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
event DepEvent) {
event DepEvent _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>),
DepEvent);
Expand All @@ -1344,7 +1391,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
copy(const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
std::remove_all_extents_t<T> *Dest,
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
size_t StartIndex = 0) {
size_t StartIndex = 0 _CODELOCPARAM(&CodeLoc)) {
_CODELOCARG(&CodeLoc);
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>));
}
Expand Down
Loading