@@ -77,15 +77,15 @@ template <typename AllocT> auto *local_mem() {
77
77
return As;
78
78
}
79
79
80
- namespace detail {
80
+ namespace experimental {
81
81
enum memcpy_direction {
82
82
host_to_host,
83
83
host_to_device,
84
84
device_to_host,
85
85
device_to_device,
86
86
automatic
87
87
};
88
- } // namespace detail
88
+ }
89
89
90
90
enum class memory_region {
91
91
global = 0 , // device global memory
@@ -122,6 +122,42 @@ class pitched_data {
122
122
size_t _pitch, _x, _y;
123
123
};
124
124
125
+ namespace experimental {
126
+ #ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
127
+ class image_mem_wrapper ;
128
+ namespace detail {
129
+ static sycl::event memcpy (const image_mem_wrapper *src,
130
+ const sycl::id<3 > &src_id, pitched_data &dest,
131
+ const sycl::id<3 > &dest_id,
132
+ const sycl::range<3 > ©_extend, sycl::queue q);
133
+ static sycl::event memcpy (const pitched_data src, const sycl::id<3 > &src_id,
134
+ image_mem_wrapper *dest, const sycl::id<3 > &dest_id,
135
+ const sycl::range<3 > ©_extend, sycl::queue q);
136
+ } // namespace detail
137
+ #endif
138
+ class image_matrix ;
139
+ namespace detail {
140
+ static pitched_data to_pitched_data (image_matrix *image);
141
+ }
142
+
143
+ // / Memory copy parameters for 2D/3D memory data.
144
+ struct memcpy_parameter {
145
+ struct data_wrapper {
146
+ pitched_data pitched{};
147
+ sycl::id<3 > pos{};
148
+ #ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
149
+ experimental::image_mem_wrapper *image_bindless{nullptr };
150
+ #endif
151
+ image_matrix *image{nullptr };
152
+ };
153
+ data_wrapper from{};
154
+ data_wrapper to{};
155
+ sycl::range<3 > size{};
156
+ syclcompat::experimental::memcpy_direction direction{
157
+ syclcompat::experimental::memcpy_direction::automatic};
158
+ };
159
+ } // namespace experimental
160
+
125
161
namespace detail {
126
162
127
163
template <class T , memory_region Memory, size_t Dimension> class accessor ;
@@ -263,21 +299,16 @@ static pointer_access_attribute get_pointer_attribute(sycl::queue q,
263
299
}
264
300
}
265
301
266
- static memcpy_direction deduce_memcpy_direction (sycl::queue q, void *to_ptr,
267
- const void *from_ptr) {
302
+ static experimental::memcpy_direction
303
+ deduce_memcpy_direction (sycl::queue q, void *to_ptr, const void *from_ptr) {
268
304
// table[to_attribute][from_attribute]
305
+ using namespace experimental ; // for memcpy_direction
269
306
static const memcpy_direction
270
307
direction_table[static_cast <unsigned >(pointer_access_attribute::end)]
271
308
[static_cast <unsigned >(pointer_access_attribute::end)] = {
272
- {memcpy_direction::host_to_host,
273
- memcpy_direction::device_to_host,
274
- memcpy_direction::host_to_host},
275
- {memcpy_direction::host_to_device,
276
- memcpy_direction::device_to_device,
277
- memcpy_direction::device_to_device},
278
- {memcpy_direction::host_to_host,
279
- memcpy_direction::device_to_device,
280
- memcpy_direction::device_to_device}};
309
+ {host_to_host, device_to_host, host_to_host},
310
+ {host_to_device, device_to_device, device_to_device},
311
+ {host_to_host, device_to_device, device_to_device}};
281
312
return direction_table[static_cast <unsigned >(get_pointer_attribute (
282
313
q, to_ptr))][static_cast <unsigned >(get_pointer_attribute (q, from_ptr))];
283
314
}
@@ -300,35 +331,36 @@ static inline size_t get_offset(sycl::id<3> id, size_t slice, size_t pitch) {
300
331
return slice * id.get (2 ) + pitch * id.get (1 ) + id.get (0 );
301
332
}
302
333
334
+ // RAII for host pointer
335
+ class host_buffer {
336
+ void *_buf;
337
+ size_t _size;
338
+ sycl::queue _q;
339
+ const std::vector<sycl::event> &_deps; // free operation depends
340
+
341
+ public:
342
+ host_buffer (size_t size, sycl::queue q, const std::vector<sycl::event> &deps)
343
+ : _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
344
+ void *get_ptr () const { return _buf; }
345
+ size_t get_size () const { return _size; }
346
+ ~host_buffer () {
347
+ if (_buf) {
348
+ _q.submit ([&](sycl::handler &cgh) {
349
+ cgh.depends_on (_deps);
350
+ cgh.host_task ([buf = _buf] { std::free (buf); });
351
+ });
352
+ }
353
+ }
354
+ };
355
+
303
356
// / copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
304
357
// / and \p from_range to another specified by \p to_ptr and \p to_range.
305
358
static inline std::vector<sycl::event>
306
359
memcpy (sycl::queue q, void *to_ptr, const void *from_ptr,
307
360
sycl::range<3 > to_range, sycl::range<3 > from_range, sycl::id<3 > to_id,
308
361
sycl::id<3 > from_id, sycl::range<3 > size,
309
362
const std::vector<sycl::event> &dep_events = {}) {
310
- // RAII for host pointer
311
- class host_buffer {
312
- void *_buf;
313
- size_t _size;
314
- sycl::queue _q;
315
- const std::vector<sycl::event> &_deps; // free operation depends
316
-
317
- public:
318
- host_buffer (size_t size, sycl::queue q,
319
- const std::vector<sycl::event> &deps)
320
- : _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
321
- void *get_ptr () const { return _buf; }
322
- size_t get_size () const { return _size; }
323
- ~host_buffer () {
324
- if (_buf) {
325
- _q.submit ([&](sycl::handler &cgh) {
326
- cgh.depends_on (_deps);
327
- cgh.host_task ([buf = _buf] { std::free (buf); });
328
- });
329
- }
330
- }
331
- };
363
+
332
364
std::vector<sycl::event> event_list;
333
365
334
366
size_t to_slice = to_range.get (1 ) * to_range.get (0 );
@@ -343,6 +375,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
343
375
return {memcpy (q, to_surface, from_surface, to_slice * size.get (2 ),
344
376
dep_events)};
345
377
}
378
+ using namespace experimental ; // for memcpy_direction
346
379
memcpy_direction direction = deduce_memcpy_direction (q, to_ptr, from_ptr);
347
380
size_t size_slice = size.get (1 ) * size.get (0 );
348
381
switch (direction) {
@@ -448,6 +481,56 @@ static sycl::event combine_events(std::vector<sycl::event> &events,
448
481
449
482
} // namespace detail
450
483
484
+ namespace experimental {
485
+ namespace detail {
486
+ static inline std::vector<sycl::event>
487
+ memcpy (sycl::queue q, const experimental::memcpy_parameter ¶m) {
488
+ auto to = param.to .pitched ;
489
+ auto from = param.from .pitched ;
490
+ #ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
491
+ if (param.to .image_bindless != nullptr &&
492
+ param.from .image_bindless != nullptr ) {
493
+ throw std::runtime_error (
494
+ " [SYCLcompat] memcpy: Unsupported bindless_image API." );
495
+ // TODO: Need change logic when sycl support image_mem to image_mem copy.
496
+ std::vector<sycl::event> event_list;
497
+ syclcompat::detail::host_buffer buf (param.size .size (), q, event_list);
498
+ to.set_data_ptr (buf.get_ptr ());
499
+ experimental::detail::memcpy (param.from .image_bindless , param.from .pos , to,
500
+ sycl::id<3 >(0 , 0 , 0 ), param.size , q);
501
+ from.set_data_ptr (buf.get_ptr ());
502
+ event_list.push_back (experimental::detail::memcpy (
503
+ from, sycl::id<3 >(0 , 0 , 0 ), param.to .image_bindless , param.to .pos ,
504
+ param.size , q));
505
+ return event_list;
506
+ } else if (param.to .image_bindless != nullptr ) {
507
+ throw std::runtime_error (
508
+ " [SYCLcompat] memcpy: Unsupported bindless_image API." );
509
+ return {experimental::detail::memcpy (from, param.from .pos ,
510
+ param.to .image_bindless , param.to .pos ,
511
+ param.size , q)};
512
+ } else if (param.from .image_bindless != nullptr ) {
513
+ throw std::runtime_error (
514
+ " [SYCLcompat] memcpy: Unsupported bindless_image API." );
515
+ return {experimental::detail::memcpy (param.from .image_bindless ,
516
+ param.from .pos , to, param.to .pos ,
517
+ param.size , q)};
518
+ }
519
+ #endif
520
+ if (param.to .image != nullptr ) {
521
+ throw std::runtime_error (" [SYCLcompat] memcpy: Unsupported image API." );
522
+ to = experimental::detail::to_pitched_data (param.to .image );
523
+ }
524
+ if (param.from .image != nullptr ) {
525
+ throw std::runtime_error (" [SYCLcompat] memcpy: Unsupported image API." );
526
+ from = experimental::detail::to_pitched_data (param.from .image );
527
+ }
528
+ return syclcompat::detail::memcpy (q, to, param.to .pos , from, param.from .pos ,
529
+ param.size );
530
+ }
531
+ } // namespace detail
532
+ } // namespace experimental
533
+
451
534
// / Allocate memory block on the device.
452
535
// / \param num_bytes Number of bytes to allocate.
453
536
// / \param q Queue to execute the allocate task.
@@ -757,6 +840,31 @@ static sycl::event inline fill_async(void *dev_ptr, const T &pattern,
757
840
return detail::fill (q, dev_ptr, pattern, count);
758
841
}
759
842
843
+ namespace experimental {
844
+
845
+ // / [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
846
+ // / The function will return after the copy is completed.
847
+ // /
848
+ // / \param param Memory copy parameters.
849
+ // / \param q Queue to execute the copy task.
850
+ // / \returns no return value.
851
+ static inline void memcpy (const memcpy_parameter ¶m,
852
+ sycl::queue q = get_default_queue()) {
853
+ sycl::event::wait (syclcompat::experimental::detail::memcpy (q, param));
854
+ }
855
+
856
+ // / [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
857
+ // / . The return of the function does NOT guarantee the copy is completed.
858
+ // /
859
+ // / \param param Memory copy parameters.
860
+ // / \param q Queue to execute the copy task.
861
+ // / \returns no return value.
862
+ static inline void memcpy_async (const memcpy_parameter ¶m,
863
+ sycl::queue q = get_default_queue()) {
864
+ syclcompat::experimental::detail::memcpy (q, param);
865
+ }
866
+ } // namespace experimental
867
+
760
868
// / Synchronously sets \p value to the first \p size bytes starting from \p
761
869
// / dev_ptr. The function will return after the memset operation is completed.
762
870
// /
0 commit comments