Skip to content

Commit 516d411

Browse files
[SYCL] Implement sycl_ext_oneapi_memcpy2d extension (#7370)
This commit adds an implementation of the sycl_ext_oneapi_memcpy2d extension. This includes the following: * Three new PI API functions; piextUSMEnqueueFill2D, piextUSMEnqueueMemset2D, and piextUSMEnqueueMemcpy2D. * Three new PI context queries; PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT, and PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT. Each of these return a boolean value signifying if the PI context supports each of the new API functions. * New handler member functions; ext_oneapi_memcpy2d, ext_oneapi_copy2d, ext_oneapi_memset2d, and ext_oneapi_fill2d. These will create new commands which will use the new PI commands if they are supported. If the new PI API functions are not supported, these member functions will launch auxiliary kernels to do the required work. * New queue shortcuts for each of the new handler members. Signed-off-by: Larsen, Steffen <[email protected]> Co-authored-by: Petr Vesely <[email protected]>
1 parent 7170b23 commit 516d411

24 files changed

+1973
-6
lines changed

sycl/include/sycl/detail/cg.hpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ class CG {
6969
CodeplayInteropTask = 13,
7070
CodeplayHostTask = 14,
7171
AdviseUSM = 15,
72+
Copy2DUSM = 16,
73+
Fill2DUSM = 17,
74+
Memset2DUSM = 18,
7275
};
7376

7477
CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage,
@@ -394,6 +397,95 @@ class CGBarrier : public CG {
394397
MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {}
395398
};
396399

400+
/// "Copy 2D USM" command group class.
401+
class CGCopy2DUSM : public CG {
402+
void *MSrc;
403+
void *MDst;
404+
size_t MSrcPitch;
405+
size_t MDstPitch;
406+
size_t MWidth;
407+
size_t MHeight;
408+
409+
public:
410+
CGCopy2DUSM(void *Src, void *Dst, size_t SrcPitch, size_t DstPitch,
411+
size_t Width, size_t Height,
412+
std::vector<std::vector<char>> ArgsStorage,
413+
std::vector<detail::AccessorImplPtr> AccStorage,
414+
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
415+
std::vector<AccessorImplHost *> Requirements,
416+
std::vector<detail::EventImplPtr> Events,
417+
detail::code_location loc = {})
418+
: CG(Copy2DUSM, std::move(ArgsStorage), std::move(AccStorage),
419+
std::move(SharedPtrStorage), std::move(Requirements),
420+
std::move(Events), std::move(loc)),
421+
MSrc(Src), MDst(Dst), MSrcPitch(SrcPitch), MDstPitch(DstPitch),
422+
MWidth(Width), MHeight(Height) {}
423+
424+
void *getSrc() const { return MSrc; }
425+
void *getDst() const { return MDst; }
426+
size_t getSrcPitch() const { return MSrcPitch; }
427+
size_t getDstPitch() const { return MDstPitch; }
428+
size_t getWidth() const { return MWidth; }
429+
size_t getHeight() const { return MHeight; }
430+
};
431+
432+
/// "Fill 2D USM" command group class.
433+
class CGFill2DUSM : public CG {
434+
std::vector<char> MPattern;
435+
void *MDst;
436+
size_t MPitch;
437+
size_t MWidth;
438+
size_t MHeight;
439+
440+
public:
441+
CGFill2DUSM(std::vector<char> Pattern, void *DstPtr, size_t Pitch,
442+
size_t Width, size_t Height,
443+
std::vector<std::vector<char>> ArgsStorage,
444+
std::vector<detail::AccessorImplPtr> AccStorage,
445+
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
446+
std::vector<AccessorImplHost *> Requirements,
447+
std::vector<detail::EventImplPtr> Events,
448+
detail::code_location loc = {})
449+
: CG(Fill2DUSM, std::move(ArgsStorage), std::move(AccStorage),
450+
std::move(SharedPtrStorage), std::move(Requirements),
451+
std::move(Events), std::move(loc)),
452+
MPattern(std::move(Pattern)), MDst(DstPtr), MPitch(Pitch),
453+
MWidth(Width), MHeight(Height) {}
454+
void *getDst() const { return MDst; }
455+
size_t getPitch() const { return MPitch; }
456+
size_t getWidth() const { return MWidth; }
457+
size_t getHeight() const { return MHeight; }
458+
const std::vector<char> &getPattern() const { return MPattern; }
459+
};
460+
461+
/// "Memset 2D USM" command group class.
462+
class CGMemset2DUSM : public CG {
463+
char MValue;
464+
void *MDst;
465+
size_t MPitch;
466+
size_t MWidth;
467+
size_t MHeight;
468+
469+
public:
470+
CGMemset2DUSM(char Value, void *DstPtr, size_t Pitch, size_t Width,
471+
size_t Height, std::vector<std::vector<char>> ArgsStorage,
472+
std::vector<detail::AccessorImplPtr> AccStorage,
473+
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
474+
std::vector<AccessorImplHost *> Requirements,
475+
std::vector<detail::EventImplPtr> Events,
476+
detail::code_location loc = {})
477+
: CG(Memset2DUSM, std::move(ArgsStorage), std::move(AccStorage),
478+
std::move(SharedPtrStorage), std::move(Requirements),
479+
std::move(Events), std::move(loc)),
480+
MValue(Value), MDst(DstPtr), MPitch(Pitch), MWidth(Width),
481+
MHeight(Height) {}
482+
void *getDst() const { return MDst; }
483+
size_t getPitch() const { return MPitch; }
484+
size_t getWidth() const { return MWidth; }
485+
size_t getHeight() const { return MHeight; }
486+
char getValue() const { return MValue; }
487+
};
488+
397489
} // namespace detail
398490
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
399491
} // namespace sycl

sycl/include/sycl/detail/pi.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,5 +141,8 @@ _PI_API(piPluginGetLastError)
141141

142142
_PI_API(piTearDown)
143143

144+
_PI_API(piextUSMEnqueueFill2D)
145+
_PI_API(piextUSMEnqueueMemset2D)
146+
_PI_API(piextUSMEnqueueMemcpy2D)
144147

145148
#undef _PI_API

sycl/include/sycl/detail/pi.h

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,15 @@
6868
// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also
6969
// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo.
7070
// Both are needed to support sycl_ext_intel_queue_index extension.
71+
// 12.21 Added new piextUSMEnqueueFill2D, piextUSMEnqueueMemset2D, and
72+
// piextUSMEnqueueMemcpy2D functions. Added new
73+
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT,
74+
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and
75+
// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query
76+
// descriptors.
7177

7278
#define _PI_H_VERSION_MAJOR 12
73-
#define _PI_H_VERSION_MINOR 20
79+
#define _PI_H_VERSION_MINOR 21
7480

7581
#define _PI_STRING_HELPER(a) #a
7682
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -335,7 +341,11 @@ typedef enum {
335341
PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080,
336342
// Atomics capabilities extensions
337343
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
338-
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011
344+
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011,
345+
// Native 2D USM memory operation support
346+
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000,
347+
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001,
348+
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT = 0x30002
339349
} _pi_context_info;
340350

341351
typedef enum {
@@ -1809,6 +1819,62 @@ __SYCL_EXPORT pi_result piextUSMGetMemAllocInfo(
18091819
pi_context context, const void *ptr, pi_mem_alloc_info param_name,
18101820
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
18111821

1822+
/// USM 2D fill API
1823+
///
1824+
/// \param queue is the queue to submit to
1825+
/// \param ptr is the ptr to fill
1826+
/// \param pitch is the total width of the destination memory including padding
1827+
/// \param pattern is a pointer with the bytes of the pattern to set
1828+
/// \param pattern_size is the size in bytes of the pattern
1829+
/// \param width is width in bytes of each row to fill
1830+
/// \param height is height the columns to fill
1831+
/// \param num_events_in_waitlist is the number of events to wait on
1832+
/// \param events_waitlist is an array of events to wait on
1833+
/// \param event is the event that represents this operation
1834+
__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr,
1835+
size_t pitch, size_t pattern_size,
1836+
const void *pattern, size_t width,
1837+
size_t height,
1838+
pi_uint32 num_events_in_waitlist,
1839+
const pi_event *events_waitlist,
1840+
pi_event *event);
1841+
1842+
/// USM 2D Memset API
1843+
///
1844+
/// \param queue is the queue to submit to
1845+
/// \param ptr is the ptr to fill
1846+
/// \param pitch is the total width of the destination memory including padding
1847+
/// \param value the value to fill into the region in \param ptr
1848+
/// \param width is width in bytes of each row to fill
1849+
/// \param height is height the columns to fill
1850+
/// \param num_events_in_waitlist is the number of events to wait on
1851+
/// \param events_waitlist is an array of events to wait on
1852+
/// \param event is the event that represents this operation
1853+
__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(
1854+
pi_queue queue, void *ptr, size_t pitch, int value, size_t width,
1855+
size_t height, pi_uint32 num_events_in_waitlist,
1856+
const pi_event *events_waitlist, pi_event *event);
1857+
1858+
/// USM 2D Memcpy API
1859+
///
1860+
/// \param queue is the queue to submit to
1861+
/// \param blocking is whether this operation should block the host
1862+
/// \param dst_ptr is the location the data will be copied
1863+
/// \param dst_pitch is the total width of the destination memory including
1864+
/// padding
1865+
/// \param src_ptr is the data to be copied
1866+
/// \param src_pitch is the total width of the source memory including padding
1867+
/// \param width is width in bytes of each row to be copied
1868+
/// \param height is height the columns to be copied
1869+
/// \param num_events_in_waitlist is the number of events to wait on
1870+
/// \param events_waitlist is an array of events to wait on
1871+
/// \param event is the event that represents this operation
1872+
__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D(
1873+
pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch,
1874+
const void *src_ptr, size_t src_pitch, size_t width, size_t height,
1875+
pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist,
1876+
pi_event *event);
1877+
18121878
/// API to get Plugin internal data, opaque to SYCL RT. Some devices whose
18131879
/// device code is compiled by the host compiler (e.g. CPU emulators) may use it
18141880
/// to access some device code functionality implemented in/behind the plugin.

sycl/include/sycl/detail/pi.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ using PiDeviceType = ::pi_device_type;
112112
using PiDeviceInfo = ::pi_device_info;
113113
using PiDeviceBinaryType = ::pi_device_binary_type;
114114
using PiContext = ::pi_context;
115+
using PiContextInfo = ::pi_context_info;
115116
using PiProgram = ::pi_program;
116117
using PiKernel = ::pi_kernel;
117118
using PiQueue = ::pi_queue;

sycl/include/sycl/feature_test.hpp.in

100755100644
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
7171
#define SYCL_EXT_ONEAPI_QUEUE_EMPTY 1
7272
#define SYCL_EXT_ONEAPI_USER_DEFINED_REDUCTIONS 1
7373
#define SYCL_EXT_ONEAPI_WEAK_OBJECT 1
74+
#define SYCL_EXT_ONEAPI_MEMCPY2D 1
7475
#cmakedefine01 SYCL_BUILD_PI_CUDA
7576
#if SYCL_BUILD_PI_CUDA
7677
#define SYCL_EXT_ONEAPI_BACKEND_CUDA 1

0 commit comments

Comments
 (0)