Skip to content

Commit ed2b572

Browse files
committed
[SYCL][Bindless] Add image_mem_handle to image_mem_handle devices copies.
1 parent 6501156 commit ed2b572

File tree

7 files changed

+248
-10
lines changed

7 files changed

+248
-10
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,12 @@ public:
749749
size_t DeviceRowPitch,
750750
sycl::range<3> HostExtent,
751751
sycl::range<3> CopyExtent);
752+
753+
// Simple device to device copy
754+
void ext_oneapi_copy(
755+
ext::oneapi::experimental::image_mem_handle Src,
756+
ext::oneapi::experimental::image_mem_handle Dest,
757+
const ext::oneapi::experimental::image_descriptor &ImageDesc);
752758
};
753759

754760
class queue {
@@ -874,14 +880,32 @@ public:
874880
size_t DeviceRowPitch,
875881
sycl::range<3> HostExtent,
876882
sycl::range<3> CopyExtent);
883+
884+
// Simple device to device copy
885+
event ext_oneapi_copy(
886+
ext::oneapi::experimental::image_mem_handle Src,
887+
ext::oneapi::experimental::image_mem_handle Dest,
888+
const ext::oneapi::experimental::image_descriptor &ImageDesc);
889+
event ext_oneapi_copy(
890+
ext::oneapi::experimental::image_mem_handle Src,
891+
ext::oneapi::experimental::image_mem_handle Dest,
892+
const ext::oneapi::experimental::image_descriptor &ImageDesc,
893+
event DepEvent);
894+
event ext_oneapi_copy(
895+
ext::oneapi::experimental::image_mem_handle Src,
896+
ext::oneapi::experimental::image_mem_handle Dest,
897+
const ext::oneapi::experimental::image_descriptor &ImageDesc,
898+
const std::vector<event> &DepEvents);
877899
};
878900
}
879901
```
880902

881903
To enable the copying of images an `ext_oneapi_copy` function is proposed as a
882904
method of the queue and handler. It can be used to copy image memory, whether
883905
allocated through USM or using an `image_mem_handle`, from host to
884-
device, or device to host. For the `ext_oneapi_copy` variants that do not take
906+
device, or device to host. Device to device copies are currently supported only
907+
through `image_mem_handle` allocations.
908+
For the `ext_oneapi_copy` variants that do not take
885909
offsets and extents, the image descriptor passed to the `ext_oneapi_copy` API
886910
is used to determine the pixel size, dimensions, and extent in memory of the
887911
image to copy. If performing sub-region copy, the size of the memory region is
@@ -2060,4 +2084,6 @@ These features still need to be handled:
20602084
wording around what types are allowed to be read or written.
20612085
- Allow `read_image` and `read_mipmap` to return a
20622086
user-defined type.
2087+
|5.1|2024-01-17| - Added overload for `ext_oneapi_copy` enabling device to device
2088+
copies using `image_mem_handle`.
20632089
|======================

sycl/include/sycl/handler.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3227,6 +3227,18 @@ class __SYCL_EXPORT handler {
32273227
const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
32283228
size_t DeviceRowPitch);
32293229

3230+
/// Copies data from device to device memory, where \p Src and \p Dest
3231+
/// are opaque image memory handles.
3232+
/// An exception is thrown if either \p Src or \p Dest is incomplete
3233+
///
3234+
/// \param Src is an opaque image memory handle to the source memory.
3235+
/// \param Dest is an opaque image memory handle to the destination memory.
3236+
/// \param ImageDesc is the source image descriptor
3237+
void
3238+
ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src,
3239+
ext::oneapi::experimental::image_mem_handle Dest,
3240+
const ext::oneapi::experimental::image_descriptor &ImageDesc);
3241+
32303242
/// Copies data from one memory region to another, where \p Src and \p Dest
32313243
/// are USM pointers. Allows for a sub-region copy, where \p SrcOffset ,
32323244
/// \p DestOffset , and \p Extent are used to determine the sub-region.

sycl/include/sycl/queue.hpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,6 +1917,74 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
19171917
CodeLoc);
19181918
}
19191919

1920+
/// Copies data from device to device memory, where \p Src and \p Dest
1921+
/// are opaque image memory handles.
1922+
/// An exception is thrown if either \p Src or \p Dest is incomplete
1923+
///
1924+
/// \param Src is an opaque image memory handle to the source memory.
1925+
/// \param Dest is an opaque image memory handle to the destination memory.
1926+
/// \param ImageDesc is the source image descriptor
1927+
/// \param DepEvent is an events that specifies the kernel dependency.
1928+
/// \return an event representing the copy operation.
1929+
event ext_oneapi_copy(
1930+
ext::oneapi::experimental::image_mem_handle Src,
1931+
ext::oneapi::experimental::image_mem_handle Dest,
1932+
const ext::oneapi::experimental::image_descriptor &ImageDesc,
1933+
event DepEvent,
1934+
const detail::code_location &CodeLoc = detail::code_location::current()) {
1935+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1936+
return submit(
1937+
[&](handler &CGH) {
1938+
CGH.depends_on(DepEvent);
1939+
CGH.ext_oneapi_copy(Src, Dest, ImageDesc);
1940+
},
1941+
CodeLoc);
1942+
}
1943+
1944+
/// Copies data from device to device memory, where \p Src and \p Dest
1945+
/// are opaque image memory handles.
1946+
/// An exception is thrown if either \p Src or \p Dest is incomplete
1947+
///
1948+
/// \param Src is an opaque image memory handle to the source memory.
1949+
/// \param Dest is an opaque image memory handle to the destination memory.
1950+
/// \param ImageDesc is the source image descriptor
1951+
/// \param DepEvents is a vector of events that specifies the kernel
1952+
/// dependencies.
1953+
/// \return an event representing the copy operation.
1954+
event ext_oneapi_copy(
1955+
ext::oneapi::experimental::image_mem_handle Src,
1956+
ext::oneapi::experimental::image_mem_handle Dest,
1957+
const ext::oneapi::experimental::image_descriptor &ImageDesc,
1958+
const std::vector<event> &DepEvents,
1959+
const detail::code_location &CodeLoc = detail::code_location::current()) {
1960+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1961+
return submit(
1962+
[&](handler &CGH) {
1963+
CGH.depends_on(DepEvents);
1964+
CGH.ext_oneapi_copy(Src, Dest, ImageDesc);
1965+
},
1966+
CodeLoc);
1967+
}
1968+
1969+
/// Copies data from device to device memory, where \p Src and \p Dest
1970+
/// are opaque image memory handles.
1971+
/// An exception is thrown if either \p Src or \p Dest is incomplete
1972+
///
1973+
/// \param Src is an opaque image memory handle to the source memory.
1974+
/// \param Dest is an opaque image memory handle to the destination memory.
1975+
/// \param ImageDesc is the source image descriptor
1976+
/// \return an event representing the copy operation.
1977+
event ext_oneapi_copy(
1978+
ext::oneapi::experimental::image_mem_handle Src,
1979+
ext::oneapi::experimental::image_mem_handle Dest,
1980+
const ext::oneapi::experimental::image_descriptor &ImageDesc,
1981+
const detail::code_location &CodeLoc = detail::code_location::current()) {
1982+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1983+
return submit(
1984+
[&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, ImageDesc); },
1985+
CodeLoc);
1986+
}
1987+
19201988
/// Copies data from one memory region to another, where \p Src and \p Dest
19211989
/// are USM pointers. Allows for a sub-region copy, where \p SrcOffset ,
19221990
/// \p DestOffset , and \p Extent are used to determine the sub-region.

sycl/plugins/unified_runtime/CMakeLists.txt

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,9 @@ endif()
5656
if(SYCL_PI_UR_USE_FETCH_CONTENT)
5757
include(FetchContent)
5858

59-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
60-
# commit 79c28d0f0713f58358d5080653d95803fd131749
61-
# Merge: 25e0b603 45d76b78
62-
# Author: aarongreig <[email protected]>
63-
# Date: Fri Jan 12 16:14:44 2024 +0000
64-
# Merge pull request #1186 from hdelan/device-global-hip
65-
# [HIP] Add support for global variable read write
66-
set(UNIFIED_RUNTIME_TAG 79c28d0f0713f58358d5080653d95803fd131749)
59+
set(UNIFIED_RUNTIME_REPO "https://github.com/cppchedy/unified-runtime.git")
60+
set(UNIFIED_RUNTIME_TAG 0942022ba947f1832056ffa9e317dc1384c382e0)
61+
6762

6863
if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
6964
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

sycl/source/detail/memory_manager.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1755,7 +1755,9 @@ void MemoryManager::copy_image_bindless(
17551755
assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
17561756
ext::oneapi::experimental::image_copy_flags::HtoD ||
17571757
Flags == (sycl::detail::pi::PiImageCopyFlags)
1758-
ext::oneapi::experimental::image_copy_flags::DtoH) &&
1758+
ext::oneapi::experimental::image_copy_flags::DtoH ||
1759+
Flags == (sycl::detail::pi::PiImageCopyFlags)
1760+
ext::oneapi::experimental::image_copy_flags::DtoD) &&
17591761
"Invalid flags passed to copy_image_bindless.");
17601762
if (!Dst || !Src)
17611763
throw sycl::exception(

sycl/source/handler.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,42 @@ void handler::ext_oneapi_copy(
10581058
setType(detail::CG::CopyImage);
10591059
}
10601060

1061+
void handler::ext_oneapi_copy(
1062+
ext::oneapi::experimental::image_mem_handle Src,
1063+
ext::oneapi::experimental::image_mem_handle Dest,
1064+
const ext::oneapi::experimental::image_descriptor &ImageDesc) {
1065+
throwIfGraphAssociated<
1066+
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1067+
sycl_ext_oneapi_bindless_images>();
1068+
MSrcPtr = Src.raw_handle;
1069+
MDstPtr = Dest.raw_handle;
1070+
1071+
sycl::detail::pi::PiMemImageDesc PiDesc = {};
1072+
PiDesc.image_width = ImageDesc.width;
1073+
PiDesc.image_height = ImageDesc.height;
1074+
PiDesc.image_depth = ImageDesc.depth;
1075+
PiDesc.image_type =
1076+
ImageDesc.depth > 0
1077+
? PI_MEM_TYPE_IMAGE3D
1078+
: (ImageDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D);
1079+
1080+
sycl::detail::pi::PiMemImageFormat PiFormat;
1081+
PiFormat.image_channel_data_type =
1082+
sycl::_V1::detail::convertChannelType(ImageDesc.channel_type);
1083+
PiFormat.image_channel_order =
1084+
sycl::_V1::detail::convertChannelOrder(ImageDesc.channel_order);
1085+
1086+
MImpl->MSrcOffset = {0, 0, 0};
1087+
MImpl->MDestOffset = {0, 0, 0};
1088+
MImpl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
1089+
MImpl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
1090+
MImpl->MImageDesc = PiDesc;
1091+
MImpl->MImageFormat = PiFormat;
1092+
MImpl->MImageCopyFlags =
1093+
sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_DEVICE;
1094+
setType(detail::CG::CopyImage);
1095+
}
1096+
10611097
void handler::ext_oneapi_copy(
10621098
ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset,
10631099
const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// REQUIRES: linux
2+
// REQUIRES: cuda
3+
4+
// RUN: %{build} -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
#include <iostream>
8+
#include <sycl/sycl.hpp>
9+
10+
// Uncomment to print additional test information
11+
#define VERBOSE_PRINT
12+
13+
namespace syclexp = sycl::ext::oneapi::experimental;
14+
15+
void copy_image_mem_handle_to_image_mem_handle(
16+
syclexp::image_descriptor &desc, const std::vector<float> &testData,
17+
sycl::device dev, sycl::queue q, std::vector<float> &out) {
18+
syclexp::image_mem imgMemSrc(desc, dev, q.get_context());
19+
syclexp::image_mem imgMemDst(desc, dev, q.get_context());
20+
21+
q.ext_oneapi_copy((void *)testData.data(), imgMemSrc.get_handle(), desc);
22+
q.wait_and_throw();
23+
24+
q.ext_oneapi_copy(imgMemSrc.get_handle(), imgMemDst.get_handle(), desc);
25+
q.wait_and_throw();
26+
27+
q.ext_oneapi_copy(imgMemDst.get_handle(), (void *)out.data(), desc);
28+
q.wait_and_throw();
29+
}
30+
31+
bool check_test(const std::vector<float> &out,
32+
const std::vector<float> &expected) {
33+
assert(out.size() == expected.size());
34+
bool validated = true;
35+
for (int i = 0; i < out.size(); i++) {
36+
bool mismatch = false;
37+
if (out[i] != expected[i]) {
38+
mismatch = true;
39+
validated = false;
40+
}
41+
42+
if (mismatch) {
43+
#ifdef VERBOSE_PRINT
44+
std::cout << "Result mismatch! Expected: " << expected[i]
45+
<< ", Actual: " << out[i] << std::endl;
46+
#else
47+
break;
48+
#endif
49+
}
50+
}
51+
return validated;
52+
}
53+
54+
template <sycl::image_channel_order channelOrder,
55+
sycl::image_channel_type channelType, int dim>
56+
bool run_copy_test_with(sycl::device &dev, sycl::queue &q,
57+
sycl::range<dim> dims) {
58+
std::vector<float> dataSequence(dims.size());
59+
std::vector<float> out(dims.size());
60+
61+
std::vector<float> expected(dims.size());
62+
63+
std::iota(dataSequence.begin(), dataSequence.end(), 0);
64+
std::iota(expected.begin(), expected.end(), 0);
65+
66+
syclexp::image_descriptor desc(dims, channelOrder, channelType);
67+
68+
copy_image_mem_handle_to_image_mem_handle(desc, dataSequence, dev, q, out);
69+
70+
return check_test(out, expected);
71+
}
72+
73+
int main() {
74+
75+
sycl::device dev;
76+
sycl::queue q(dev);
77+
auto ctxt = q.get_context();
78+
79+
bool validated =
80+
run_copy_test_with<sycl::image_channel_order::r,
81+
sycl::image_channel_type::fp32, 1>(dev, q, {4});
82+
83+
validated &=
84+
run_copy_test_with<sycl::image_channel_order::r,
85+
sycl::image_channel_type::fp32, 2>(dev, q, {4, 4});
86+
87+
validated &=
88+
run_copy_test_with<sycl::image_channel_order::r,
89+
sycl::image_channel_type::fp32, 3>(dev, q, {4, 4, 4});
90+
91+
if (!validated) {
92+
std::cout << "Tests failed";
93+
return 1;
94+
}
95+
96+
std::cout << "Tests passed";
97+
98+
return 0;
99+
}

0 commit comments

Comments
 (0)