Skip to content

Commit 904aba0

Browse files
Hugh DelaneyAerialMantis
authored andcommitted
Fix exercise 18. Was not working before
1 parent d06c7ba commit 904aba0

File tree

1 file changed

+18
-21
lines changed

1 file changed

+18
-21
lines changed

Code_Exercises/Exercise_18_Local_Memory_Tiling/solution.cpp

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,15 @@
2323
#include <benchmark.h>
2424
#include <image_conv.h>
2525

26-
2726
class image_convolution;
2827

2928
inline constexpr util::filter_type filterType = util::filter_type::blur;
3029
inline constexpr int filterWidth = 11;
3130
inline constexpr int halo = filterWidth / 2;
3231

3332
TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
34-
const char* inputImageFile =
35-
"../Images/dogs.png";
36-
const char* outputImageFile =
37-
"../Images/blurred_dogs.png";
33+
const char *inputImageFile = "../Images/dogs.png";
34+
const char *outputImageFile = "../Images/blurred_dogs.png";
3835

3936
auto inputImage = util::read_image(inputImageFile, halo);
4037

@@ -85,7 +82,7 @@ TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
8582

8683
util::benchmark(
8784
[&]() {
88-
myQueue.submit([&](sycl::handler& cgh) {
85+
myQueue.submit([&](sycl::handler &cgh) {
8986
auto inputAcc =
9087
inBufVec.get_access<sycl::access::mode::read>(cgh);
9188
auto outputAcc =
@@ -101,31 +98,31 @@ TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
10198
cgh.parallel_for<image_convolution>(
10299
ndRange, [=](sycl::nd_item<2> item) {
103100
auto globalId = item.get_global_id();
104-
globalId = sycl::id{globalId[1], globalId[0]};
101+
auto groupId = item.get_group().get_group_id();
105102
auto localId = item.get_local_id();
103+
auto globalGroupOffset = groupId * localRange;
104+
105+
for (auto i = localId[0]; i < scratchpadRange[0];
106+
i += localRange[0]) {
107+
for (auto j = localId[1]; j < scratchpadRange[0];
108+
j += localRange[1]) {
109+
scratchpad[i][j] =
110+
inputAcc[globalGroupOffset + sycl::range(i, j)];
111+
}
112+
}
106113

107-
auto haloOffset = sycl::id(halo, halo);
108-
auto src = (globalId + haloOffset);
109-
auto dest = globalId;
110-
auto temp = localId + halo;
111-
112-
scratchpad[temp] = inputAcc[src];
113-
114-
sycl::group_barrier(item.get_group());
114+
item.barrier();
115115

116116
auto sum = sycl::float4{0.0f, 0.0f, 0.0f, 0.0f};
117117

118118
for (int r = 0; r < filterWidth; ++r) {
119119
for (int c = 0; c < filterWidth; ++c) {
120-
auto srcOffset = sycl::id(temp[0] + (r - halo),
121-
temp[1] + ((c - halo)));
122-
auto filterOffset = sycl::id(r, c);
123-
124-
sum += scratchpad[srcOffset] * filterAcc[filterOffset];
120+
auto idx = sycl::range(r, c);
121+
sum += scratchpad[localId + idx] * filterAcc[idx];
125122
}
126123
}
127124

128-
outputAcc[dest] = sum;
125+
outputAcc[globalId] = sum;
129126
});
130127
});
131128

0 commit comments

Comments
 (0)