Skip to content

Commit 5b2f1d2

Browse files
authored
[SYCL][Bindless] Image Array Sub-Region Copy (#14954)
* Add support for sub-region copies of image arrays. * Initial implementation of tests. UR PR: oneapi-src/unified-runtime#1928
1 parent 404fb8a commit 5b2f1d2

File tree

5 files changed

+332
-19
lines changed

5 files changed

+332
-19
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 24a8299efc59c715a1c2dd180692a5e12a12283a
121-
# Merge: eb63d1a2 2fea679d
120+
# commit 2bbe952669861579ea84fa30f14e1ed27ead0692
121+
# Merge: d357964a 6b353545
122122
# Author: Omar Ahmed <[email protected]>
123-
# Date: Wed Sep 11 10:40:59 2024 +0100
124-
# Merge pull request #2078 from callumfare/callum/fix_device_extensions_fpga
125-
# Add workaround for silently supported OpenCL extensions on Intel FPGA
126-
set(UNIFIED_RUNTIME_TAG 24a8299efc59c715a1c2dd180692a5e12a12283a)
123+
# Date: Thu Sep 12 11:36:11 2024 +0100
124+
# Merge pull request #1928 from isaacault/iault/image_array_copy
125+
# [Bindless][Exp] Image Array Sub-Region Copies
126+
set(UNIFIED_RUNTIME_TAG 2bbe952669861579ea84fa30f14e1ed27ead0692)
127127

128128
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129129
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ address mode `clamp_to_edge` will be applied for all dimensions. If the
744744
performed when sampling along the cube face borders.
745745
====
746746

747-
=== Explicit copies
747+
=== Explicit copies [[explicit_copies]]
748748

749749
```cpp
750750
namespace sycl {
@@ -1398,9 +1398,14 @@ As with allocation, the descriptor must be populated appropriately, i.e.
13981398

13991399
=== Copying image array data [[copying_image_array_data]]
14001400

1401-
When copying to or from image arrays, the user should copy to/from the entire
1402-
array of images in one call to `ext_oneapi_copy` by passing the image arrays'
1403-
`image_mem_handle`.
1401+
When copying to or from image arrays, the user should utilize `ext_oneapi_copy`
1402+
and pass the image arrays' `image_mem_handle`, and any applicable sub-region
1403+
copy parameters, as outlined in <<explicit_copies>>.
1404+
1405+
In order to copy to specific layers of an image array, the offset and extent
1406+
parameters involved in sub-region copies must be populated such that the 3rd
1407+
dimension of the ranges represent the arrays' layer(s) being copied, regardless
1408+
of whether the copy is performed on a 1D or 2D image array.
14041409

14051410
=== Reading an image array
14061411

@@ -2888,4 +2893,5 @@ These features still need to be handled:
28882893
`map_external_linear_memory`.
28892894
|6 |2024-08-05 | - Collated all changes since revision 5.
28902895
- Bumped SYCL_EXT_ONEAPI_BINDLESS_IMAGES to number 6.
2896+
|6.1|2024-09-09| - Update for image-array sub-region copy support.
28912897
|======================

sycl/source/handler.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include "ur_api.h"
109
#include "sycl/detail/helpers.hpp"
10+
#include "ur_api.h"
1111
#include <algorithm>
1212

1313
#include <detail/config.hpp>
@@ -1046,10 +1046,15 @@ void handler::ext_oneapi_copy(
10461046
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
10471047
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
10481048
: UrDesc.type;
1049+
1050+
// Array size is depth extent.
1051+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
10491052
} else {
10501053
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
10511054
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
10521055
: UR_MEM_TYPE_IMAGE1D);
1056+
1057+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
10531058
}
10541059

10551060
ur_image_format_t UrFormat;
@@ -1061,7 +1066,6 @@ void handler::ext_oneapi_copy(
10611066

10621067
impl->MSrcOffset = {0, 0, 0};
10631068
impl->MDestOffset = {0, 0, 0};
1064-
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
10651069
impl->MSrcImageDesc = UrDesc;
10661070
impl->MDstImageDesc = UrDesc;
10671071
impl->MSrcImageFormat = UrFormat;
@@ -1136,7 +1140,7 @@ void handler::ext_oneapi_copy(
11361140
sycl_ext_oneapi_bindless_images>();
11371141
Desc.verify();
11381142

1139-
MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
1143+
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
11401144
MDstPtr = Dest;
11411145

11421146
ur_image_desc_t UrDesc = {};
@@ -1156,10 +1160,15 @@ void handler::ext_oneapi_copy(
11561160
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
11571161
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
11581162
: UrDesc.type;
1163+
1164+
// Array size is depth extent.
1165+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
11591166
} else {
11601167
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
11611168
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
11621169
: UR_MEM_TYPE_IMAGE1D);
1170+
1171+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
11631172
}
11641173

11651174
ur_image_format_t UrFormat;
@@ -1171,7 +1180,6 @@ void handler::ext_oneapi_copy(
11711180

11721181
impl->MSrcOffset = {0, 0, 0};
11731182
impl->MDestOffset = {0, 0, 0};
1174-
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
11751183
impl->MSrcImageDesc = UrDesc;
11761184
impl->MDstImageDesc = UrDesc;
11771185
impl->MSrcImageFormat = UrFormat;
@@ -1189,8 +1197,8 @@ void handler::ext_oneapi_copy(
11891197
sycl_ext_oneapi_bindless_images>();
11901198
ImageDesc.verify();
11911199

1192-
MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
1193-
MDstPtr = reinterpret_cast<void*>(Dest.raw_handle);
1200+
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
1201+
MDstPtr = reinterpret_cast<void *>(Dest.raw_handle);
11941202

11951203
ur_image_desc_t UrDesc = {};
11961204
UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
@@ -1208,11 +1216,17 @@ void handler::ext_oneapi_copy(
12081216
ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
12091217
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
12101218
: UrDesc.type;
1219+
1220+
// Array size is depth extent.
1221+
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height,
1222+
ImageDesc.array_size};
12111223
} else {
12121224
UrDesc.type = ImageDesc.depth > 0
12131225
? UR_MEM_TYPE_IMAGE3D
12141226
: (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D
12151227
: UR_MEM_TYPE_IMAGE1D);
1228+
1229+
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
12161230
}
12171231

12181232
ur_image_format_t UrFormat;
@@ -1224,7 +1238,6 @@ void handler::ext_oneapi_copy(
12241238

12251239
impl->MSrcOffset = {0, 0, 0};
12261240
impl->MDestOffset = {0, 0, 0};
1227-
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
12281241
impl->MSrcImageDesc = UrDesc;
12291242
impl->MDstImageDesc = UrDesc;
12301243
impl->MSrcImageFormat = UrFormat;
@@ -1244,7 +1257,7 @@ void handler::ext_oneapi_copy(
12441257
sycl_ext_oneapi_bindless_images>();
12451258
SrcImgDesc.verify();
12461259

1247-
MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
1260+
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
12481261
MDstPtr = Dest;
12491262

12501263
ur_image_desc_t UrDesc = {};
@@ -1320,10 +1333,15 @@ void handler::ext_oneapi_copy(
13201333
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
13211334
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
13221335
: UrDesc.type;
1336+
1337+
// Array size is depth extent.
1338+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
13231339
} else {
13241340
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
13251341
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
13261342
: UR_MEM_TYPE_IMAGE1D);
1343+
1344+
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
13271345
}
13281346

13291347
ur_image_format_t UrFormat;
@@ -1335,7 +1353,6 @@ void handler::ext_oneapi_copy(
13351353

13361354
impl->MSrcOffset = {0, 0, 0};
13371355
impl->MDestOffset = {0, 0, 0};
1338-
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
13391356
impl->MSrcImageDesc = UrDesc;
13401357
impl->MDstImageDesc = UrDesc;
13411358
impl->MSrcImageFormat = UrFormat;
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// REQUIRES: cuda
2+
3+
// RUN: %{build} -o %t.out
4+
// RUN: %{run} %t.out
5+
6+
#include <iostream>
7+
#include <sycl/detail/core.hpp>
8+
9+
#include <sycl/ext/oneapi/bindless_images.hpp>
10+
11+
// Uncomment to print additional test information
12+
// #define VERBOSE_PRINT
13+
14+
class image_addition;
15+
16+
int main() {
17+
18+
sycl::device dev;
19+
sycl::queue q(dev);
20+
auto ctxt = q.get_context();
21+
22+
// declare image data
23+
size_t width = 4;
24+
size_t layers = 2;
25+
size_t N = width * layers;
26+
std::vector<float> out(N);
27+
std::vector<float> expected(N);
28+
std::vector<float> dataIn1(N);
29+
std::vector<float> dataIn2(N);
30+
for (int i = 0; i < width; i++) {
31+
for (int j = 0; j < layers; j++) {
32+
expected[j + ((layers)*i)] = (j + (layers)*i) * 3;
33+
dataIn1[j + ((layers)*i)] = (j + (layers)*i);
34+
dataIn2[j + ((layers)*i)] = (j + (layers)*i) * 2;
35+
}
36+
}
37+
38+
// Image descriptor - can use the same for both images
39+
sycl::ext::oneapi::experimental::image_descriptor desc(
40+
{width}, 1, sycl::image_channel_type::fp32,
41+
sycl::ext::oneapi::experimental::image_type::array, 1, layers);
42+
43+
try {
44+
// Extension: allocate memory on device and create the handle
45+
sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q);
46+
sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q);
47+
sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q);
48+
49+
// Extension: create the image and return the handle
50+
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 =
51+
sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q);
52+
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 =
53+
sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q);
54+
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 =
55+
sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q);
56+
57+
// The subregion size for the copies.
58+
sycl::range copyExtent = {width / 2, 1, layers / 2};
59+
// The extent of data provided on the host (vector).
60+
sycl::range srcExtent = {width, 1, layers};
61+
62+
// the 4 subregion offsets used for the copies.
63+
std::vector<sycl::range<3>> offsets{{0, 0, 0},
64+
{width / 2, 0, 0},
65+
{0, 0, layers / 2},
66+
{width / 2, 0, layers / 2}};
67+
68+
for (auto offset : offsets) {
69+
// Extension: Copy to image array subregion.
70+
q.ext_oneapi_copy(dataIn1.data(), offset, srcExtent, imgMem0.get_handle(),
71+
offset, desc, copyExtent);
72+
// Extension: Copy to image array subregion.
73+
q.ext_oneapi_copy(dataIn2.data(), offset, srcExtent, imgMem1.get_handle(),
74+
offset, desc, copyExtent);
75+
}
76+
q.wait_and_throw();
77+
78+
q.submit([&](sycl::handler &cgh) {
79+
cgh.parallel_for<image_addition>(
80+
sycl::nd_range<2>{{width, layers}, {width, layers}},
81+
[=](sycl::nd_item<2> it) {
82+
size_t dim0 = it.get_local_id(0);
83+
size_t dim1 = it.get_local_id(1);
84+
float sum = 0;
85+
// Extension: fetch image data from handle
86+
float px1 =
87+
sycl::ext::oneapi::experimental::fetch_image_array<float>(
88+
imgHandle1, int(dim0), dim1);
89+
float px2 =
90+
sycl::ext::oneapi::experimental::fetch_image_array<float>(
91+
imgHandle2, int(dim0), dim1);
92+
93+
sum = px1 + px2;
94+
95+
// Extension: write to image with handle
96+
sycl::ext::oneapi::experimental::write_image_array<float>(
97+
imgHandle3, int(dim0), dim1, sum);
98+
});
99+
});
100+
q.wait_and_throw();
101+
102+
// Extension: copy data from device to host (four subregions/quadrants)
103+
for (auto offset : offsets) {
104+
q.ext_oneapi_copy(imgMem2.get_handle(), offset, desc, out.data(), offset,
105+
srcExtent, copyExtent);
106+
}
107+
q.wait_and_throw();
108+
109+
// Extension: cleanup
110+
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q);
111+
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q);
112+
} catch (sycl::exception e) {
113+
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
114+
return 1;
115+
} catch (...) {
116+
std::cerr << "Unknown exception caught!\n";
117+
return 2;
118+
}
119+
120+
// collect and validate output
121+
bool validated = true;
122+
for (int i = 0; i < N; i++) {
123+
bool mismatch = false;
124+
if (out[i] != expected[i]) {
125+
mismatch = true;
126+
validated = false;
127+
}
128+
129+
if (mismatch) {
130+
#ifdef VERBOSE_PRINT
131+
std::cout << "Result mismatch! Expected: " << expected[i]
132+
<< ", Actual: " << out[i] << std::endl;
133+
#else
134+
break;
135+
#endif
136+
}
137+
}
138+
if (validated) {
139+
std::cout << "Test passed!" << std::endl;
140+
return 0;
141+
}
142+
143+
std::cout << "Test failed!" << std::endl;
144+
return 3;
145+
}

0 commit comments

Comments
 (0)