Skip to content

Commit 9332d1a

Browse files
authored
[SYCL][COMPAT] Migrate currently unsupported memcpy_parameter overloads (#14039)
This PR adds `syclcompat::experimental::memcpy` and `syclcompat::experimental::memcpy_async` taking `memcpy_parameter` arguments which wrap either: - deprecated `sycl::image` - experimental bindless images Since image support was decided to be out of scope for 2025.0, these APIs are introduced for forward compatibility only, and currently throw a `std::runtime_error`. --------- Signed-off-by: Joe Todd <[email protected]>
1 parent 6ac0a3f commit 9332d1a

File tree

6 files changed

+603
-38
lines changed

6 files changed

+603
-38
lines changed

sycl/doc/syclcompat/README.md

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ static void destroy_event(event_ptr event);
399399
} // syclcompat
400400
```
401401
402-
### Memory Allocation
402+
### Memory Operations
403403
404404
This library provides interfaces to allocate memory to be accessed within kernel
405405
functions and on the host. The `syclcompat::malloc` function allocates device
@@ -510,6 +510,64 @@ public:
510510
} // syclcompat
511511
```
512512

513+
The `syclcompat::experimental` namespace contains currently unsupported `memcpy` overloads which take a `syclcompat::experimental::memcpy_parameter` argument. These are included for forwards compatibility and currently throw a `std::runtime_error`.
514+
515+
```cpp
516+
namespace syclcompat {
517+
namespace experimental {
518+
// Forward declarations for types relating to unsupported memcpy_parameter API:
519+
520+
enum memcpy_direction {
521+
host_to_host,
522+
host_to_device,
523+
device_to_host,
524+
device_to_device,
525+
automatic
526+
};
527+
528+
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
529+
class image_mem_wrapper;
530+
#endif
531+
class image_matrix;
532+
533+
/// Memory copy parameters for 2D/3D memory data.
534+
struct memcpy_parameter {
535+
struct data_wrapper {
536+
pitched_data pitched{};
537+
sycl::id<3> pos{};
538+
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
539+
experimental::image_mem_wrapper *image_bindless{nullptr};
540+
#endif
541+
image_matrix *image{nullptr};
542+
};
543+
data_wrapper from{};
544+
data_wrapper to{};
545+
sycl::range<3> size{};
546+
syclcompat::detail::memcpy_direction direction{syclcompat::detail::memcpy_direction::automatic};
547+
};
548+
549+
/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
550+
/// The function will return after the copy is completed.
551+
///
552+
/// \param param Memory copy parameters.
553+
/// \param q Queue to execute the copy task.
554+
/// \returns no return value.
555+
static inline void memcpy(const memcpy_parameter &param,
556+
sycl::queue q = get_default_queue());
557+
558+
/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
559+
/// . The return of the function does NOT guarantee the copy is completed.
560+
///
561+
/// \param param Memory copy parameters.
562+
/// \param q Queue to execute the copy task.
563+
/// \returns no return value.
564+
static inline void memcpy_async(const memcpy_parameter &param,
565+
sycl::queue q = get_default_queue());
566+
567+
} // namespace experimental
568+
} // namespace syclcompat
569+
```
570+
513571
Finally, the class `pitched_data`, which manages memory allocation for 3D
514572
spaces, padded to avoid uncoalesced memory accesses.
515573

sycl/include/syclcompat/memory.hpp

Lines changed: 143 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,15 @@ template <typename AllocT> auto *local_mem() {
7777
return As;
7878
}
7979

80-
namespace detail {
80+
namespace experimental {
8181
enum memcpy_direction {
8282
host_to_host,
8383
host_to_device,
8484
device_to_host,
8585
device_to_device,
8686
automatic
8787
};
88-
} // namespace detail
88+
}
8989

9090
enum class memory_region {
9191
global = 0, // device global memory
@@ -122,6 +122,42 @@ class pitched_data {
122122
size_t _pitch, _x, _y;
123123
};
124124

125+
namespace experimental {
126+
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
127+
class image_mem_wrapper;
128+
namespace detail {
129+
static sycl::event memcpy(const image_mem_wrapper *src,
130+
const sycl::id<3> &src_id, pitched_data &dest,
131+
const sycl::id<3> &dest_id,
132+
const sycl::range<3> &copy_extend, sycl::queue q);
133+
static sycl::event memcpy(const pitched_data src, const sycl::id<3> &src_id,
134+
image_mem_wrapper *dest, const sycl::id<3> &dest_id,
135+
const sycl::range<3> &copy_extend, sycl::queue q);
136+
} // namespace detail
137+
#endif
138+
class image_matrix;
139+
namespace detail {
140+
static pitched_data to_pitched_data(image_matrix *image);
141+
}
142+
143+
/// Memory copy parameters for 2D/3D memory data.
144+
struct memcpy_parameter {
145+
struct data_wrapper {
146+
pitched_data pitched{};
147+
sycl::id<3> pos{};
148+
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
149+
experimental::image_mem_wrapper *image_bindless{nullptr};
150+
#endif
151+
image_matrix *image{nullptr};
152+
};
153+
data_wrapper from{};
154+
data_wrapper to{};
155+
sycl::range<3> size{};
156+
syclcompat::experimental::memcpy_direction direction{
157+
syclcompat::experimental::memcpy_direction::automatic};
158+
};
159+
} // namespace experimental
160+
125161
namespace detail {
126162

127163
template <class T, memory_region Memory, size_t Dimension> class accessor;
@@ -263,21 +299,16 @@ static pointer_access_attribute get_pointer_attribute(sycl::queue q,
263299
}
264300
}
265301

266-
static memcpy_direction deduce_memcpy_direction(sycl::queue q, void *to_ptr,
267-
const void *from_ptr) {
302+
static experimental::memcpy_direction
303+
deduce_memcpy_direction(sycl::queue q, void *to_ptr, const void *from_ptr) {
268304
// table[to_attribute][from_attribute]
305+
using namespace experimental; // for memcpy_direction
269306
static const memcpy_direction
270307
direction_table[static_cast<unsigned>(pointer_access_attribute::end)]
271308
[static_cast<unsigned>(pointer_access_attribute::end)] = {
272-
{memcpy_direction::host_to_host,
273-
memcpy_direction::device_to_host,
274-
memcpy_direction::host_to_host},
275-
{memcpy_direction::host_to_device,
276-
memcpy_direction::device_to_device,
277-
memcpy_direction::device_to_device},
278-
{memcpy_direction::host_to_host,
279-
memcpy_direction::device_to_device,
280-
memcpy_direction::device_to_device}};
309+
{host_to_host, device_to_host, host_to_host},
310+
{host_to_device, device_to_device, device_to_device},
311+
{host_to_host, device_to_device, device_to_device}};
281312
return direction_table[static_cast<unsigned>(get_pointer_attribute(
282313
q, to_ptr))][static_cast<unsigned>(get_pointer_attribute(q, from_ptr))];
283314
}
@@ -300,35 +331,36 @@ static inline size_t get_offset(sycl::id<3> id, size_t slice, size_t pitch) {
300331
return slice * id.get(2) + pitch * id.get(1) + id.get(0);
301332
}
302333

334+
// RAII for host pointer
335+
class host_buffer {
336+
void *_buf;
337+
size_t _size;
338+
sycl::queue _q;
339+
const std::vector<sycl::event> &_deps; // free operation depends
340+
341+
public:
342+
host_buffer(size_t size, sycl::queue q, const std::vector<sycl::event> &deps)
343+
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
344+
void *get_ptr() const { return _buf; }
345+
size_t get_size() const { return _size; }
346+
~host_buffer() {
347+
if (_buf) {
348+
_q.submit([&](sycl::handler &cgh) {
349+
cgh.depends_on(_deps);
350+
cgh.host_task([buf = _buf] { std::free(buf); });
351+
});
352+
}
353+
}
354+
};
355+
303356
/// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
304357
/// and \p from_range to another specified by \p to_ptr and \p to_range.
305358
static inline std::vector<sycl::event>
306359
memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
307360
sycl::range<3> to_range, sycl::range<3> from_range, sycl::id<3> to_id,
308361
sycl::id<3> from_id, sycl::range<3> size,
309362
const std::vector<sycl::event> &dep_events = {}) {
310-
// RAII for host pointer
311-
class host_buffer {
312-
void *_buf;
313-
size_t _size;
314-
sycl::queue _q;
315-
const std::vector<sycl::event> &_deps; // free operation depends
316-
317-
public:
318-
host_buffer(size_t size, sycl::queue q,
319-
const std::vector<sycl::event> &deps)
320-
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
321-
void *get_ptr() const { return _buf; }
322-
size_t get_size() const { return _size; }
323-
~host_buffer() {
324-
if (_buf) {
325-
_q.submit([&](sycl::handler &cgh) {
326-
cgh.depends_on(_deps);
327-
cgh.host_task([buf = _buf] { std::free(buf); });
328-
});
329-
}
330-
}
331-
};
363+
332364
std::vector<sycl::event> event_list;
333365

334366
size_t to_slice = to_range.get(1) * to_range.get(0);
@@ -343,6 +375,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
343375
return {memcpy(q, to_surface, from_surface, to_slice * size.get(2),
344376
dep_events)};
345377
}
378+
using namespace experimental; // for memcpy_direction
346379
memcpy_direction direction = deduce_memcpy_direction(q, to_ptr, from_ptr);
347380
size_t size_slice = size.get(1) * size.get(0);
348381
switch (direction) {
@@ -448,6 +481,56 @@ static sycl::event combine_events(std::vector<sycl::event> &events,
448481

449482
} // namespace detail
450483

484+
namespace experimental {
485+
namespace detail {
486+
static inline std::vector<sycl::event>
487+
memcpy(sycl::queue q, const experimental::memcpy_parameter &param) {
488+
auto to = param.to.pitched;
489+
auto from = param.from.pitched;
490+
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
491+
if (param.to.image_bindless != nullptr &&
492+
param.from.image_bindless != nullptr) {
493+
throw std::runtime_error(
494+
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
495+
// TODO: Need change logic when sycl support image_mem to image_mem copy.
496+
std::vector<sycl::event> event_list;
497+
syclcompat::detail::host_buffer buf(param.size.size(), q, event_list);
498+
to.set_data_ptr(buf.get_ptr());
499+
experimental::detail::memcpy(param.from.image_bindless, param.from.pos, to,
500+
sycl::id<3>(0, 0, 0), param.size, q);
501+
from.set_data_ptr(buf.get_ptr());
502+
event_list.push_back(experimental::detail::memcpy(
503+
from, sycl::id<3>(0, 0, 0), param.to.image_bindless, param.to.pos,
504+
param.size, q));
505+
return event_list;
506+
} else if (param.to.image_bindless != nullptr) {
507+
throw std::runtime_error(
508+
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
509+
return {experimental::detail::memcpy(from, param.from.pos,
510+
param.to.image_bindless, param.to.pos,
511+
param.size, q)};
512+
} else if (param.from.image_bindless != nullptr) {
513+
throw std::runtime_error(
514+
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
515+
return {experimental::detail::memcpy(param.from.image_bindless,
516+
param.from.pos, to, param.to.pos,
517+
param.size, q)};
518+
}
519+
#endif
520+
if (param.to.image != nullptr) {
521+
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
522+
to = experimental::detail::to_pitched_data(param.to.image);
523+
}
524+
if (param.from.image != nullptr) {
525+
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
526+
from = experimental::detail::to_pitched_data(param.from.image);
527+
}
528+
return syclcompat::detail::memcpy(q, to, param.to.pos, from, param.from.pos,
529+
param.size);
530+
}
531+
} // namespace detail
532+
} // namespace experimental
533+
451534
/// Allocate memory block on the device.
452535
/// \param num_bytes Number of bytes to allocate.
453536
/// \param q Queue to execute the allocate task.
@@ -757,6 +840,31 @@ static sycl::event inline fill_async(void *dev_ptr, const T &pattern,
757840
return detail::fill(q, dev_ptr, pattern, count);
758841
}
759842

843+
namespace experimental {
844+
845+
/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
846+
/// The function will return after the copy is completed.
847+
///
848+
/// \param param Memory copy parameters.
849+
/// \param q Queue to execute the copy task.
850+
/// \returns no return value.
851+
static inline void memcpy(const memcpy_parameter &param,
852+
sycl::queue q = get_default_queue()) {
853+
sycl::event::wait(syclcompat::experimental::detail::memcpy(q, param));
854+
}
855+
856+
/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
857+
/// . The return of the function does NOT guarantee the copy is completed.
858+
///
859+
/// \param param Memory copy parameters.
860+
/// \param q Queue to execute the copy task.
861+
/// \returns no return value.
862+
static inline void memcpy_async(const memcpy_parameter &param,
863+
sycl::queue q = get_default_queue()) {
864+
syclcompat::experimental::detail::memcpy(q, param);
865+
}
866+
} // namespace experimental
867+
760868
/// Synchronously sets \p value to the first \p size bytes starting from \p
761869
/// dev_ptr. The function will return after the memset operation is completed.
762870
///

0 commit comments

Comments
 (0)