Skip to content

[SYCL] Implement sycl_ext_oneapi_memcpy2d extension #7370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ class CG {
CodeplayInteropTask = 13,
CodeplayHostTask = 14,
AdviseUSM = 15,
Copy2DUSM = 16,
Fill2DUSM = 17,
Memset2DUSM = 18,
};

CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage,
Expand Down Expand Up @@ -394,6 +397,95 @@ class CGBarrier : public CG {
MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {}
};

/// "Copy 2D USM" command group class.
class CGCopy2DUSM : public CG {
void *MSrc;
void *MDst;
size_t MSrcPitch;
size_t MDstPitch;
size_t MWidth;
size_t MHeight;

public:
CGCopy2DUSM(void *Src, void *Dst, size_t SrcPitch, size_t DstPitch,
size_t Width, size_t Height,
std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
std::vector<AccessorImplHost *> Requirements,
std::vector<detail::EventImplPtr> Events,
detail::code_location loc = {})
: CG(Copy2DUSM, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MSrc(Src), MDst(Dst), MSrcPitch(SrcPitch), MDstPitch(DstPitch),
MWidth(Width), MHeight(Height) {}

void *getSrc() const { return MSrc; }
void *getDst() const { return MDst; }
size_t getSrcPitch() const { return MSrcPitch; }
size_t getDstPitch() const { return MDstPitch; }
size_t getWidth() const { return MWidth; }
size_t getHeight() const { return MHeight; }
};

/// "Fill 2D USM" command group class.
class CGFill2DUSM : public CG {
std::vector<char> MPattern;
void *MDst;
size_t MPitch;
size_t MWidth;
size_t MHeight;

public:
CGFill2DUSM(std::vector<char> Pattern, void *DstPtr, size_t Pitch,
size_t Width, size_t Height,
std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
std::vector<AccessorImplHost *> Requirements,
std::vector<detail::EventImplPtr> Events,
detail::code_location loc = {})
: CG(Fill2DUSM, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MPattern(std::move(Pattern)), MDst(DstPtr), MPitch(Pitch),
MWidth(Width), MHeight(Height) {}
void *getDst() const { return MDst; }
size_t getPitch() const { return MPitch; }
size_t getWidth() const { return MWidth; }
size_t getHeight() const { return MHeight; }
const std::vector<char> &getPattern() const { return MPattern; }
};

/// "Memset 2D USM" command group class.
class CGMemset2DUSM : public CG {
char MValue;
void *MDst;
size_t MPitch;
size_t MWidth;
size_t MHeight;

public:
CGMemset2DUSM(char Value, void *DstPtr, size_t Pitch, size_t Width,
size_t Height, std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
std::vector<AccessorImplHost *> Requirements,
std::vector<detail::EventImplPtr> Events,
detail::code_location loc = {})
: CG(Memset2DUSM, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MValue(Value), MDst(DstPtr), MPitch(Pitch), MWidth(Width),
MHeight(Height) {}
void *getDst() const { return MDst; }
size_t getPitch() const { return MPitch; }
size_t getWidth() const { return MWidth; }
size_t getHeight() const { return MHeight; }
char getValue() const { return MValue; }
};

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
3 changes: 3 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,8 @@ _PI_API(piPluginGetLastError)

_PI_API(piTearDown)

_PI_API(piextUSMEnqueueFill2D)
_PI_API(piextUSMEnqueueMemset2D)
_PI_API(piextUSMEnqueueMemcpy2D)

#undef _PI_API
70 changes: 68 additions & 2 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,15 @@
// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also
// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo.
// Both are needed to support sycl_ext_intel_queue_index extension.
// 12.21 Added new piextUSMEnqueueFill2D, piextUSMEnqueueMemset2D, and
// piextUSMEnqueueMemcpy2D functions. Added new
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT,
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query
// descriptors.

#define _PI_H_VERSION_MAJOR 12
#define _PI_H_VERSION_MINOR 20
#define _PI_H_VERSION_MINOR 21

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -335,7 +341,11 @@ typedef enum {
PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080,
// Atomics capabilities extensions
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011,
// Native 2D USM memory operation support
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder, why 0x30000 and not 0x20000, for example? Is it documented anywhere which values should be picked up for new capabilities?

Copy link
Contributor Author

@steffenlarsen steffenlarsen Nov 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we have any rules necessarily. All these values are taken from OpenCL so we normally just try and move around what values are in OpenCL when adding extension values. In theory it should be fine as long as they don't conflict with any values in the same category, but it's good to keep a buffer.

PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001,
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT = 0x30002
} _pi_context_info;

typedef enum {
Expand Down Expand Up @@ -1809,6 +1819,62 @@ __SYCL_EXPORT pi_result piextUSMGetMemAllocInfo(
pi_context context, const void *ptr, pi_mem_alloc_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret);

/// USM 2D fill API
///
/// \param queue is the queue to submit to
/// \param ptr is the ptr to fill
/// \param pitch is the total width of the destination memory including padding
/// \param pattern is a pointer with the bytes of the pattern to set
/// \param pattern_size is the size in bytes of the pattern
/// \param width is width in bytes of each row to fill
/// \param height is height the columns to fill
/// \param num_events_in_waitlist is the number of events to wait on
/// \param events_waitlist is an array of events to wait on
/// \param event is the event that represents this operation
__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The similar "buffer" API use "Rect" instead of "2D". I think we should unify names if possible (especially for Unified Runtime). Tag @kbenzie

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@smaslov-intel the difference with *Rect though is they support 2D or 3D where as this is only 2D.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it not make sense to add 3D for USM?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3D could make sense, that would be outside of the scope of this extension though.

I'm less sure if combining the 2D and 3D entry points into a rect entry point is the right way to go though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we were to split 2D/3D then maybe we should do the same to the existing Rect API four buffers
OK, this discussion belongs to Unified Runtime issue, I guess

size_t pitch, size_t pattern_size,
const void *pattern, size_t width,
size_t height,
pi_uint32 num_events_in_waitlist,
const pi_event *events_waitlist,
pi_event *event);

/// USM 2D Memset API
///
/// \param queue is the queue to submit to
/// \param ptr is the ptr to fill
/// \param pitch is the total width of the destination memory including padding
/// \param value the value to fill into the region in \param ptr
/// \param width is width in bytes of each row to fill
/// \param height is height the columns to fill
/// \param num_events_in_waitlist is the number of events to wait on
/// \param events_waitlist is an array of events to wait on
/// \param event is the event that represents this operation
__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(
pi_queue queue, void *ptr, size_t pitch, int value, size_t width,
size_t height, pi_uint32 num_events_in_waitlist,
const pi_event *events_waitlist, pi_event *event);

/// USM 2D Memcpy API
///
/// \param queue is the queue to submit to
/// \param blocking is whether this operation should block the host
/// \param dst_ptr is the location the data will be copied
/// \param dst_pitch is the total width of the destination memory including
/// padding
/// \param src_ptr is the data to be copied
/// \param src_pitch is the total width of the source memory including padding
/// \param width is width in bytes of each row to be copied
/// \param height is height the columns to be copied
/// \param num_events_in_waitlist is the number of events to wait on
/// \param events_waitlist is an array of events to wait on
/// \param event is the event that represents this operation
__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D(
pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch,
const void *src_ptr, size_t src_pitch, size_t width, size_t height,
pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist,
pi_event *event);

/// API to get Plugin internal data, opaque to SYCL RT. Some devices whose
/// device code is compiled by the host compiler (e.g. CPU emulators) may use it
/// to access some device code functionality implemented in/behind the plugin.
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ using PiDeviceType = ::pi_device_type;
using PiDeviceInfo = ::pi_device_info;
using PiDeviceBinaryType = ::pi_device_binary_type;
using PiContext = ::pi_context;
using PiContextInfo = ::pi_context_info;
using PiProgram = ::pi_program;
using PiKernel = ::pi_kernel;
using PiQueue = ::pi_queue;
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/feature_test.hpp.in
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#define SYCL_EXT_ONEAPI_QUEUE_EMPTY 1
#define SYCL_EXT_ONEAPI_USER_DEFINED_REDUCTIONS 1
#define SYCL_EXT_ONEAPI_WEAK_OBJECT 1
#define SYCL_EXT_ONEAPI_MEMCPY2D 1
#cmakedefine01 SYCL_BUILD_PI_CUDA
#if SYCL_BUILD_PI_CUDA
#define SYCL_EXT_ONEAPI_BACKEND_CUDA 1
Expand Down
Loading