23
23
#include < benchmark.h>
24
24
#include < image_conv.h>
25
25
26
-
27
26
class image_convolution ;
28
27
29
28
inline constexpr util::filter_type filterType = util::filter_type::blur;
30
29
inline constexpr int filterWidth = 11 ;
31
30
inline constexpr int halo = filterWidth / 2 ;
32
31
33
32
TEST_CASE (" image_convolution_tiled" , " local_memory_tiling_solution" ) {
34
- const char * inputImageFile =
35
- " ../Images/dogs.png" ;
36
- const char * outputImageFile =
37
- " ../Images/blurred_dogs.png" ;
33
+ const char *inputImageFile = " ../Images/dogs.png" ;
34
+ const char *outputImageFile = " ../Images/blurred_dogs.png" ;
38
35
39
36
auto inputImage = util::read_image (inputImageFile, halo);
40
37
@@ -85,7 +82,7 @@ TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
85
82
86
83
util::benchmark (
87
84
[&]() {
88
- myQueue.submit ([&](sycl::handler& cgh) {
85
+ myQueue.submit ([&](sycl::handler & cgh) {
89
86
auto inputAcc =
90
87
inBufVec.get_access <sycl::access::mode::read>(cgh);
91
88
auto outputAcc =
@@ -101,31 +98,31 @@ TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
101
98
cgh.parallel_for <image_convolution>(
102
99
ndRange, [=](sycl::nd_item<2 > item) {
103
100
auto globalId = item.get_global_id ();
104
- globalId = sycl::id{globalId[ 1 ], globalId[ 0 ]} ;
101
+ auto groupId = item. get_group (). get_group_id () ;
105
102
auto localId = item.get_local_id ();
103
+ auto globalGroupOffset = groupId * localRange;
104
+
105
+ for (auto i = localId[0 ]; i < scratchpadRange[0 ];
106
+ i += localRange[0 ]) {
107
+ for (auto j = localId[1 ]; j < scratchpadRange[0 ];
108
+ j += localRange[1 ]) {
109
+ scratchpad[i][j] =
110
+ inputAcc[globalGroupOffset + sycl::range (i, j)];
111
+ }
112
+ }
106
113
107
- auto haloOffset = sycl::id (halo, halo);
108
- auto src = (globalId + haloOffset);
109
- auto dest = globalId;
110
- auto temp = localId + halo;
111
-
112
- scratchpad[temp] = inputAcc[src];
113
-
114
- sycl::group_barrier (item.get_group ());
114
+ item.barrier ();
115
115
116
116
auto sum = sycl::float4{0 .0f , 0 .0f , 0 .0f , 0 .0f };
117
117
118
118
for (int r = 0 ; r < filterWidth; ++r) {
119
119
for (int c = 0 ; c < filterWidth; ++c) {
120
- auto srcOffset = sycl::id (temp[0 ] + (r - halo),
121
- temp[1 ] + ((c - halo)));
122
- auto filterOffset = sycl::id (r, c);
123
-
124
- sum += scratchpad[srcOffset] * filterAcc[filterOffset];
120
+ auto idx = sycl::range (r, c);
121
+ sum += scratchpad[localId + idx] * filterAcc[idx];
125
122
}
126
123
}
127
124
128
- outputAcc[dest ] = sum;
125
+ outputAcc[globalId ] = sum;
129
126
});
130
127
});
131
128
0 commit comments