Skip to content

Commit 91aa17c

Browse files
authored
Merge pull request #160 from codeplaysoftware/imageconv-perf-fix
Make image conv memory coalescence example more intuitive
2 parents ebb0e85 + 1f71136 commit 91aa17c

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

Code_Exercises/Exercise_15_Image_Convolution/reference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_CASE("image_convolution_naive", "image_convolution_reference") {
6161
auto halo = filter.half_width();
6262

6363
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
64-
auto localRange = sycl::range(32, 1);
64+
auto localRange = sycl::range(1, 32);
6565
auto ndRange = sycl::nd_range(globalRange, localRange);
6666

6767
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -84,6 +84,7 @@ TEST_CASE("image_convolution_naive", "image_convolution_reference") {
8484
cgh.parallel_for<image_convolution>(
8585
ndRange, [=](sycl::nd_item<2> item) {
8686
auto globalId = item.get_global_id();
87+
globalId = sycl::id{globalId[1], globalId[0]};
8788

8889
auto channelsStride = sycl::range(1, channels);
8990
auto haloOffset = sycl::id(halo, halo);

Code_Exercises/Exercise_16_Coalesced_Global_Memory/solution.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_CASE("image_convolution_coalesced", "coalesced_global_memory_solution") {
6161
auto halo = filter.half_width();
6262

6363
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
64-
auto localRange = sycl::range(32, 1);
64+
auto localRange = sycl::range(1, 32);
6565
auto ndRange = sycl::nd_range(globalRange, localRange);
6666

6767
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -84,7 +84,6 @@ TEST_CASE("image_convolution_coalesced", "coalesced_global_memory_solution") {
8484
cgh.parallel_for<image_convolution>(
8585
ndRange, [=](sycl::nd_item<2> item) {
8686
auto globalId = item.get_global_id();
87-
globalId = sycl::id{globalId[1], globalId[0]};
8887

8988
auto channelsStride = sycl::range(1, channels);
9089
auto haloOffset = sycl::id(halo, halo);

Code_Exercises/Exercise_17_Vectors/solution.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ TEST_CASE("image_convolution_vectorized", "vectors_solution") {
6262
auto halo = filter.half_width();
6363

6464
auto globalRange = sycl::range(inputImgWidth, inputImgHeight);
65-
auto localRange = sycl::range(32, 1);
65+
auto localRange = sycl::range(1, 32);
6666
auto ndRange = sycl::nd_range(globalRange, localRange);
6767

6868
auto inBufRange = (inputImgWidth + (halo * 2)) * sycl::range(1, channels);
@@ -92,7 +92,6 @@ TEST_CASE("image_convolution_vectorized", "vectors_solution") {
9292
cgh.parallel_for<image_convolution>(
9393
ndRange, [=](sycl::nd_item<2> item) {
9494
auto globalId = item.get_global_id();
95-
globalId = sycl::id{globalId[1], globalId[0]};
9695

9796
auto haloOffset = sycl::id(halo, halo);
9897
auto src = (globalId + haloOffset);

0 commit comments

Comments
 (0)