Skip to content

Commit 1f71136

Browse files
committed
Make image conv memory coalescence example more intuitive
The previous arrangement used a rather devious (32, 1) group size to enforce a requirement for flipping the globalId inside the kernel. This is fine but it makes it seem like a sycl ID is row major but a sycl::buffer is col major. The exercise is now kind of reversed - we keep an intuitive work-group shape in reference.cpp but we add the (now rogue) globalId flip so that memory access isn't coalesced. The 'solution' for the coalescence exercise now is to remove the globalId flip.
1 parent 7d5824b commit 1f71136

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

Code_Exercises/Exercise_15_Image_Convolution/reference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_CASE("image_convolution_naive", "image_convolution_reference") {
6161
auto halo = filter.half_width();
6262

6363
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
64-
auto localRange = sycl::range(32, 1);
64+
auto localRange = sycl::range(1, 32);
6565
auto ndRange = sycl::nd_range(globalRange, localRange);
6666

6767
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -86,6 +86,7 @@ TEST_CASE("image_convolution_naive", "image_convolution_reference") {
8686
cgh.parallel_for<image_convolution>(
8787
ndRange, [=](sycl::nd_item<2> item) {
8888
auto globalId = item.get_global_id();
89+
globalId = sycl::id{globalId[1], globalId[0]};
8990

9091
auto channelsStride = sycl::range(1, channels);
9192
auto haloOffset = sycl::id(halo, halo);

Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_CASE("image_convolution_coalesced", "coalesced_global_memory_solution") {
6161
auto halo = filter.half_width();
6262

6363
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
64-
auto localRange = sycl::range(32, 1);
64+
auto localRange = sycl::range(1, 32);
6565
auto ndRange = sycl::nd_range(globalRange, localRange);
6666

6767
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -86,7 +86,6 @@ TEST_CASE("image_convolution_coalesced", "coalesced_global_memory_solution") {
8686
cgh.parallel_for<image_convolution>(
8787
ndRange, [=](sycl::nd_item<2> item) {
8888
auto globalId = item.get_global_id();
89-
globalId = sycl::id{globalId[1], globalId[0]};
9089

9190
auto channelsStride = sycl::range(1, channels);
9291
auto haloOffset = sycl::id(halo, halo);

Code_Exercises/Exercise_17_Vectors/solution.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ TEST_CASE("image_convolution_vectorized", "vectors_solution") {
6262
auto halo = filter.half_width();
6363

6464
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
65-
auto localRange = sycl::range(32, 1);
65+
auto localRange = sycl::range(1, 32);
6666
auto ndRange = sycl::nd_range(globalRange, localRange);
6767

6868
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -95,7 +95,6 @@ TEST_CASE("image_convolution_vectorized", "vectors_solution") {
9595
cgh.parallel_for<image_convolution>(
9696
ndRange, [=](sycl::nd_item<2> item) {
9797
auto globalId = item.get_global_id();
98-
globalId = sycl::id{globalId[1], globalId[0]};
9998

10099
auto haloOffset = sycl::id(halo, halo);
101100
auto src = (globalId + haloOffset);

0 commit comments

Comments
 (0)