Skip to content

Commit ff30897

Browse files
alexeyvoronov-intelromanovvlad
authored andcommitted
[SYCL] Enable handler copy method for N to M dimensions.
Enabled the handler copy method to use it for cases with mismatched dimensions of accessors. Signed-off-by: Alexey Voronov <[email protected]>
1 parent 8acf36e commit ff30897

File tree

2 files changed

+234
-9
lines changed

2 files changed

+234
-9
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,39 @@ class handler {
10591059
MAccStorage.push_back(std::move(AccImpl));
10601060
}
10611061

1062-
// copy memory pointed by accessor to the memory pointed by another accessor
1062+
static id<1> getDelinearizedIndex(const range<1> Range, const size_t Index) {
1063+
return {Index};
1064+
}
1065+
1066+
static id<2> getDelinearizedIndex(const range<2> Range, const size_t Index) {
1067+
size_t x = Index / Range[1];
1068+
size_t y = Index % Range[1];
1069+
return {x, y};
1070+
}
1071+
1072+
static id<3> getDelinearizedIndex(const range<3> Range, const size_t Index) {
1073+
size_t x = Index / (Range[1] * Range[2]);
1074+
size_t y = (Index / Range[2]) % Range[1];
1075+
size_t z = Index % Range[2];
1076+
return {x, y, z};
1077+
}
1078+
1079+
// Checks whether it is possible to copy the source shape to the destination
1080+
// shape(the shapes are described by the accessor ranges) by using
1081+
// copying by regions of memory and not copying element by element
1082+
// Shapes can be 1, 2 or 3 dimensional rectangles.
1083+
template <int Dims_Src, int Dims_Dst>
1084+
static bool IsCopyingRectRegionAvailable(const range<Dims_Src> Src,
1085+
const range<Dims_Dst> Dst) {
1086+
if (Dims_Src > Dims_Dst)
1087+
return false;
1088+
for (size_t I = 0; I < Dims_Src; ++I)
1089+
if (Src[I] > Dst[I])
1090+
return false;
1091+
return true;
1092+
}
1093+
1094+
// copy memory pointed by accessor to the memory pointed by another accessor
10631095
template <
10641096
typename T_Src, int Dims_Src, access::mode AccessMode_Src,
10651097
access::target AccessTarget_Src, typename T_Dst, int Dims_Dst,
@@ -1076,21 +1108,32 @@ class handler {
10761108
"Invalid source accessor target for the copy method.");
10771109
static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
10781110
"Invalid destination accessor target for the copy method.");
1079-
#ifndef __SYCL_DEVICE_ONLY__
1080-
if (MIsHost) {
1081-
range<Dims_Src> Range = Dst.get_range();
1111+
// TODO replace to get_size() when it will provide correct values.
1112+
assert(
1113+
(Dst.get_range().size() * sizeof(T_Dst) >=
1114+
Src.get_range().size() * sizeof(T_Src)) &&
1115+
"dest must have at least as many bytes as the range accessed by src.");
1116+
if (MIsHost ||
1117+
!IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range())) {
1118+
range<Dims_Src> CopyRange = Src.get_range();
1119+
size_t Range = 1;
1120+
for (size_t I = 0; I < Dims_Src; ++I)
1121+
Range *= CopyRange[I];
1122+
range<1> LinearizedRange(Range);
10821123
parallel_for< class __copyAcc2Acc< T_Src, Dims_Src, AccessMode_Src,
10831124
AccessTarget_Src, T_Dst, Dims_Dst,
10841125
AccessMode_Dst, AccessTarget_Dst,
10851126
IsPlaceholder_Src,
10861127
IsPlaceholder_Dst>>
1087-
(Range, [=](id<Dims_Src> Index) {
1088-
Dst[Index] = Src[Index];
1128+
(LinearizedRange, [=](id<1> Id) {
1129+
size_t Index = Id[0];
1130+
id<Dims_Src> SrcIndex = getDelinearizedIndex(Src.get_range(), Index);
1131+
id<Dims_Dst> DstIndex = getDelinearizedIndex(Dst.get_range(), Index);
1132+
Dst[DstIndex] = Src[SrcIndex];
10891133
});
10901134

10911135
return;
10921136
}
1093-
#endif
10941137
MCGType = detail::CG::COPY_ACC_TO_ACC;
10951138

10961139
detail::AccessorBaseHost *AccBaseSrc = (detail::AccessorBaseHost *)&Src;

sycl/test/basic_tests/handler/handler_mem_op.cpp

Lines changed: 184 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <cassert>
1616
#include <iostream>
17+
#include <numeric>
1718

1819
using namespace cl::sycl;
1920

@@ -22,8 +23,10 @@ template <typename T> struct point {
2223
point(T x, T y) : x(x), y(y) {}
2324
point(T v) : x(v), y(v) {}
2425
point() : x(0), y(0) {}
25-
bool operator==(const T &rhs) { return rhs == x && rhs == y; }
26-
bool operator==(const point<T> &rhs) { return rhs.x == x && rhs.y == y; }
26+
bool operator==(const T &rhs) const { return rhs == x && rhs == y; }
27+
bool operator==(const point<T> &rhs) const {
28+
return rhs.x == x && rhs.y == y;
29+
}
2730
T x;
2831
T y;
2932
};
@@ -38,6 +41,12 @@ template <typename T> void test_copy_acc_acc();
3841
template <typename T> void test_update_host();
3942
template <typename T> void test_2D_copy_acc_acc();
4043
template <typename T> void test_3D_copy_acc_acc();
44+
template <typename T> void test_1D2D_copy_acc_acc();
45+
template <typename T> void test_1D3D_copy_acc_acc();
46+
template <typename T> void test_2D1D_copy_acc_acc();
47+
template <typename T> void test_2D3D_copy_acc_acc();
48+
template <typename T> void test_3D1D_copy_acc_acc();
49+
template <typename T> void test_3D2D_copy_acc_acc();
4150

4251
int main() {
4352
// handler.fill
@@ -126,6 +135,59 @@ int main() {
126135
test_3D_copy_acc_acc<point<float>>();
127136
}
128137

138+
// handler.copy(acc, acc) 1D to 2D
139+
{
140+
test_1D2D_copy_acc_acc<int>();
141+
test_1D2D_copy_acc_acc<int>();
142+
test_1D2D_copy_acc_acc<point<int>>();
143+
test_1D2D_copy_acc_acc<point<int>>();
144+
test_1D2D_copy_acc_acc<point<float>>();
145+
}
146+
147+
// handler.copy(acc, acc) 1D to 3D
148+
{
149+
test_1D3D_copy_acc_acc<int>();
150+
test_1D3D_copy_acc_acc<int>();
151+
test_1D3D_copy_acc_acc<point<int>>();
152+
test_1D3D_copy_acc_acc<point<int>>();
153+
test_1D3D_copy_acc_acc<point<float>>();
154+
}
155+
156+
// handler.copy(acc, acc) 2D to 1D
157+
{
158+
test_2D1D_copy_acc_acc<int>();
159+
test_2D1D_copy_acc_acc<int>();
160+
test_2D1D_copy_acc_acc<point<int>>();
161+
test_2D1D_copy_acc_acc<point<int>>();
162+
test_2D1D_copy_acc_acc<point<float>>();
163+
}
164+
165+
// handler.copy(acc, acc) 2D to 3D
166+
{
167+
test_2D3D_copy_acc_acc<int>();
168+
test_2D3D_copy_acc_acc<int>();
169+
test_2D3D_copy_acc_acc<point<int>>();
170+
test_2D3D_copy_acc_acc<point<int>>();
171+
test_2D3D_copy_acc_acc<point<float>>();
172+
}
173+
174+
// handler.copy(acc, acc) 3D to 1D
175+
{
176+
test_3D1D_copy_acc_acc<int>();
177+
test_3D1D_copy_acc_acc<int>();
178+
test_3D1D_copy_acc_acc<point<int>>();
179+
test_3D1D_copy_acc_acc<point<int>>();
180+
test_3D1D_copy_acc_acc<point<float>>();
181+
}
182+
183+
// handler.copy(acc, acc) 3D to 2D
184+
{
185+
test_3D2D_copy_acc_acc<int>();
186+
test_3D2D_copy_acc_acc<int>();
187+
test_3D2D_copy_acc_acc<point<int>>();
188+
test_3D2D_copy_acc_acc<point<int>>();
189+
test_3D2D_copy_acc_acc<point<float>>();
190+
}
129191
std::cout << "finish" << std::endl;
130192
return 0;
131193
}
@@ -365,3 +427,123 @@ template <typename T> void test_3D_copy_acc_acc() {
365427
}
366428
}
367429
}
430+
431+
template <typename T> void test_1D2D_copy_acc_acc() {
432+
const size_t Size = 20;
433+
std::vector<T> Data(Size);
434+
std::iota(Data.begin(), Data.end(), 0);
435+
std::vector<T> Values(Size, T{});
436+
{
437+
buffer<T, 1> BufferFrom(&Data[0], range<1>(Size));
438+
buffer<T, 2> BufferTo(&Values[0], range<2>(Size / 2, 2));
439+
queue Queue;
440+
Queue.submit([&](handler &Cgh) {
441+
accessor<T, 1, access::mode::read, access::target::global_buffer>
442+
AccessorFrom(BufferFrom, Cgh, range<1>(Size));
443+
accessor<T, 2, access::mode::write, access::target::global_buffer>
444+
AccessorTo(BufferTo, Cgh, range<2>(Size / 2, 2));
445+
Cgh.copy(AccessorFrom, AccessorTo);
446+
});
447+
}
448+
assert(Data == Values);
449+
}
450+
451+
template <typename T> void test_1D3D_copy_acc_acc() {
452+
const size_t Size = 20;
453+
std::vector<T> Data(Size);
454+
std::iota(Data.begin(), Data.end(), 0);
455+
std::vector<T> Values(Size, T{});
456+
{
457+
buffer<T, 1> BufferFrom(&Data[0], range<1>(Size));
458+
buffer<T, 3> BufferTo(&Values[0], range<3>(Size / 4, 2, 2));
459+
queue Queue;
460+
Queue.submit([&](handler &Cgh) {
461+
accessor<T, 1, access::mode::read, access::target::global_buffer>
462+
AccessorFrom(BufferFrom, Cgh, range<1>(Size));
463+
accessor<T, 3, access::mode::write, access::target::global_buffer>
464+
AccessorTo(BufferTo, Cgh, range<3>(Size / 4, 2, 2));
465+
Cgh.copy(AccessorFrom, AccessorTo);
466+
});
467+
}
468+
assert(Data == Values);
469+
}
470+
471+
template <typename T> void test_2D1D_copy_acc_acc() {
472+
const size_t Size = 20;
473+
std::vector<T> Data(Size);
474+
std::iota(Data.begin(), Data.end(), 0);
475+
std::vector<T> Values(Size, T{});
476+
{
477+
buffer<T, 2> BufferFrom(&Data[0], range<2>(Size / 2, 2));
478+
buffer<T, 1> BufferTo(&Values[0], range<1>(Size));
479+
queue Queue;
480+
Queue.submit([&](handler &Cgh) {
481+
accessor<T, 2, access::mode::read, access::target::global_buffer>
482+
AccessorFrom(BufferFrom, Cgh, range<2>(Size / 2, 2));
483+
accessor<T, 1, access::mode::write, access::target::global_buffer>
484+
AccessorTo(BufferTo, Cgh, range<1>(Size));
485+
Cgh.copy(AccessorFrom, AccessorTo);
486+
});
487+
}
488+
assert(Data == Values);
489+
}
490+
491+
template <typename T> void test_2D3D_copy_acc_acc() {
492+
const size_t Size = 20;
493+
std::vector<T> Data(Size);
494+
std::iota(Data.begin(), Data.end(), 0);
495+
std::vector<T> Values(Size, T{});
496+
{
497+
buffer<T, 2> BufferFrom(&Data[0], range<2>(Size / 2, 2));
498+
buffer<T, 3> BufferTo(&Values[0], range<3>(Size / 4, 2, 2));
499+
queue Queue;
500+
Queue.submit([&](handler &Cgh) {
501+
accessor<T, 2, access::mode::read, access::target::global_buffer>
502+
AccessorFrom(BufferFrom, Cgh, range<2>(Size / 2, 2));
503+
accessor<T, 3, access::mode::write, access::target::global_buffer>
504+
AccessorTo(BufferTo, Cgh, range<3>(Size / 4, 2, 2));
505+
Cgh.copy(AccessorFrom, AccessorTo);
506+
});
507+
}
508+
assert(Data == Values);
509+
}
510+
511+
template <typename T> void test_3D1D_copy_acc_acc() {
512+
const size_t Size = 20;
513+
std::vector<T> Data(Size);
514+
std::iota(Data.begin(), Data.end(), 0);
515+
std::vector<T> Values(Size, T{});
516+
{
517+
buffer<T, 3> BufferFrom(&Data[0], range<3>(Size / 4, 2, 2));
518+
buffer<T, 1> BufferTo(&Values[0], range<1>(Size));
519+
queue Queue;
520+
Queue.submit([&](handler &Cgh) {
521+
accessor<T, 3, access::mode::read, access::target::global_buffer>
522+
AccessorFrom(BufferFrom, Cgh, range<3>(Size / 4, 2, 2));
523+
accessor<T, 1, access::mode::write, access::target::global_buffer>
524+
AccessorTo(BufferTo, Cgh, range<1>(Size));
525+
Cgh.copy(AccessorFrom, AccessorTo);
526+
});
527+
}
528+
assert(Data == Values);
529+
}
530+
531+
template <typename T> void test_3D2D_copy_acc_acc() {
532+
const size_t Size = 20;
533+
std::vector<T> Data(Size);
534+
std::iota(Data.begin(), Data.end(), 0);
535+
std::vector<T> Values(Size, T{});
536+
{
537+
buffer<T, 3> BufferFrom(&Data[0], range<3>(Size / 4, 2, 2));
538+
buffer<T, 2> BufferTo(&Values[0], range<2>(Size / 2, 2));
539+
queue Queue;
540+
Queue.submit([&](handler &Cgh) {
541+
accessor<T, 3, access::mode::read, access::target::global_buffer>
542+
AccessorFrom(BufferFrom, Cgh, range<3>(Size / 4, 2, 2));
543+
accessor<T, 2, access::mode::write, access::target::global_buffer>
544+
AccessorTo(BufferTo, Cgh, range<2>(Size / 2, 2));
545+
Cgh.copy(AccessorFrom, AccessorTo);
546+
});
547+
}
548+
assert(Data == Values);
549+
}

0 commit comments

Comments
 (0)