Skip to content

Commit 6a53f5b

Browse files
Hugh DelaneyAerialMantis
authored andcommitted
Applying same fix to ex 19
1 parent a8db3bb commit 6a53f5b

File tree

1 file changed

+20
-23
lines changed

1 file changed

+20
-23
lines changed

Code_Exercises/Exercise_19_Work_Group_Sizes/solution.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,15 @@
2323
#include <benchmark.h>
2424
#include <image_conv.h>
2525

26-
2726
class image_convolution;
2827

2928
inline constexpr util::filter_type filterType = util::filter_type::blur;
3029
inline constexpr int filterWidth = 11;
3130
inline constexpr int halo = filterWidth / 2;
3231

33-
TEST_CASE("image_convolution_work_group_sizes", "work_group_sizes_solution") {
34-
const char* inputImageFile =
35-
"../Images/dogs.png";
36-
const char* outputImageFile =
37-
"../Images/blurred_dogs.png";
32+
TEST_CASE("image_convolution_tiled", "local_memory_tiling_solution") {
33+
constexpr auto inputImageFile = "../Images/dogs.png";
34+
constexpr auto outputImageFile = "../Images/blurred_dogs.png";
3835

3936
auto inputImage = util::read_image(inputImageFile, halo);
4037

@@ -84,8 +81,8 @@ TEST_CASE("image_convolution_work_group_sizes", "work_group_sizes_solution") {
8481
filterRange / sycl::range(1, channels));
8582

8683
util::benchmark(
87-
[&]() {
88-
myQueue.submit([&](sycl::handler& cgh) {
84+
[&] {
85+
myQueue.submit([&](sycl::handler &cgh) {
8986
auto inputAcc =
9087
inBufVec.get_access<sycl::access::mode::read>(cgh);
9188
auto outputAcc =
@@ -101,31 +98,31 @@ TEST_CASE("image_convolution_work_group_sizes", "work_group_sizes_solution") {
10198
cgh.parallel_for<image_convolution>(
10299
ndRange, [=](sycl::nd_item<2> item) {
103100
auto globalId = item.get_global_id();
104-
globalId = sycl::id{globalId[1], globalId[0]};
101+
auto groupId = item.get_group().get_group_id();
105102
auto localId = item.get_local_id();
103+
auto globalGroupOffset = groupId * localRange;
104+
105+
for (auto i = localId[0]; i < scratchpadRange[0];
106+
i += localRange[0]) {
107+
for (auto j = localId[1]; j < scratchpadRange[1];
108+
j += localRange[1]) {
109+
scratchpad[i][j] =
110+
inputAcc[globalGroupOffset + sycl::range(i, j)];
111+
}
112+
}
106113

107-
auto haloOffset = sycl::id(halo, halo);
108-
auto src = (globalId + haloOffset);
109-
auto dest = globalId;
110-
auto temp = localId + halo;
111-
112-
scratchpad[temp] = inputAcc[src];
113-
114-
sycl::group_barrier(item.get_group());
114+
item.barrier();
115115

116116
auto sum = sycl::float4{0.0f, 0.0f, 0.0f, 0.0f};
117117

118118
for (int r = 0; r < filterWidth; ++r) {
119119
for (int c = 0; c < filterWidth; ++c) {
120-
auto srcOffset = sycl::id(temp[0] + (r - halo),
121-
temp[1] + ((c - halo)));
122-
auto filterOffset = sycl::id(r, c);
123-
124-
sum += scratchpad[srcOffset] * filterAcc[filterOffset];
120+
auto idx = sycl::range(r, c);
121+
sum += scratchpad[localId + idx] * filterAcc[idx];
125122
}
126123
}
127124

128-
outputAcc[dest] = sum;
125+
outputAcc[globalId] = sum;
129126
});
130127
});
131128

0 commit comments

Comments
 (0)