Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 03f7c6b

Browse files
Rewrite Stencils tests using buffers
1 parent d9bf2c2 commit 03f7c6b

File tree

2 files changed

+44
-42
lines changed

2 files changed

+44
-42
lines changed

SYCL/ESIMD/Stencil.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
// RUN: %clangxx -fsycl %s -o %t.out
1111
// RUN: %HOST_RUN_PLACEHOLDER %t.out
1212
// RUN: %GPU_RUN_PLACEHOLDER %t.out
13-
// Temporary disabled on Windows
14-
// UNSUPPORTED: windows
1513

1614
#include "esimd_test_utils.hpp"
1715

@@ -24,7 +22,7 @@
2422
// test 8x16 block size
2523
//
2624
#define DIM_SIZE (1 << 13)
27-
#define SQUARE_SZ (DIM_SIZE * DIM_SIZE + 16)
25+
#define SQUARE_SZ (DIM_SIZE * DIM_SIZE)
2826

2927
#define WIDTH 16
3028
#define HEIGHT 16
@@ -99,15 +97,18 @@ int main(void) {
9997
auto ctxt = q.get_context();
10098

10199
// create and init matrices
102-
float *inputMatrix =
103-
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
104-
float *outputMatrix =
105-
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
100+
float *inputMatrix = new float[SQUARE_SZ];
101+
float *outputMatrix = new float[SQUARE_SZ];
106102
InitializeSquareMatrix(inputMatrix, DIM_SIZE, false);
107103
InitializeSquareMatrix(outputMatrix, DIM_SIZE, true);
108104

109105
try {
106+
buffer<float, 1> buf_in(inputMatrix, range<1>(SQUARE_SZ));
107+
buffer<float, 1> buf_out(outputMatrix, range<1>(SQUARE_SZ));
108+
110109
auto e = q.submit([&](handler &cgh) {
110+
auto input = buf_in.get_access<access::mode::read>(cgh);
111+
auto output = buf_out.get_access<access::mode::write>(cgh);
111112
cgh.parallel_for<class Stencil_kernel>(
112113
GlobalRange * LocalRange, [=](item<2> it) SYCL_ESIMD_KERNEL {
113114
using namespace sycl::INTEL::gpu;
@@ -123,23 +124,23 @@ int main(void) {
123124
// the code will interleave data loading and compute
124125
// first, we load enough data for the first 16 pixels
125126
//
126-
unsigned off = (v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH;
127+
unsigned off =
128+
((v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH) * sizeof(float);
127129
#pragma unroll
128130
for (unsigned i = 0; i < 10; i++) {
129-
in.row(i) = block_load<float, 32>(inputMatrix + off);
130-
off += DIM_SIZE;
131+
in.row(i) = block_load<float, 32>(input, off);
132+
off += DIM_SIZE * sizeof(float);
131133
}
132134

133135
unsigned out_off =
134-
(((v_pos * HEIGHT + 5) * DIM_SIZE + (h_pos * WIDTH) + 5)) *
135-
sizeof(float);
136+
((v_pos * HEIGHT + 5) * DIM_SIZE + (h_pos * WIDTH) + 5);
136137
simd<unsigned, WIDTH> elm16(0, 1);
137138

138139
#pragma unroll
139140
for (unsigned i = 0; i < HEIGHT; i++) {
140141

141-
in.row(10 + i) = block_load<float, 32>(inputMatrix + off);
142-
off += DIM_SIZE;
142+
in.row(10 + i) = block_load<float, 32>(input, off);
143+
off += DIM_SIZE * sizeof(float);
143144

144145
simd<float, WIDTH> sum =
145146
in.row(i + 0).select<WIDTH, 1>(5) * -0.02f +
@@ -166,9 +167,9 @@ int main(void) {
166167
// predciate output
167168
simd<ushort, WIDTH> p = (elm16 + h_pos * WIDTH) < DIM_SIZE - 10;
168169

169-
simd<unsigned, WIDTH> elm16_off = elm16 * sizeof(float) + out_off;
170-
scatter<float, WIDTH>(outputMatrix, sum, elm16_off, p);
171-
out_off += DIM_SIZE * sizeof(float);
170+
simd<unsigned, WIDTH> elm16_off = elm16 + out_off;
171+
scatter<float, WIDTH>(output, sum, elm16_off, 0, p);
172+
out_off += DIM_SIZE;
172173

173174
if (v_pos * HEIGHT + 10 + i >= DIM_SIZE - 1)
174175
break;
@@ -178,8 +179,8 @@ int main(void) {
178179
e.wait();
179180
} catch (cl::sycl::exception const &e) {
180181
std::cout << "SYCL exception caught: " << e.what() << '\n';
181-
free(inputMatrix, ctxt);
182-
free(outputMatrix, ctxt);
182+
delete[] inputMatrix;
183+
delete[] outputMatrix;
183184
return e.get_cl_code();
184185
}
185186

@@ -190,7 +191,7 @@ int main(void) {
190191
} else {
191192
std::cout << "FAILED" << std::endl;
192193
}
193-
free(inputMatrix, ctxt);
194-
free(outputMatrix, ctxt);
194+
delete[] inputMatrix;
195+
delete[] outputMatrix;
195196
return 0;
196197
}

SYCL/ESIMD/stencil2.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
// RUN: %clangxx -fsycl %s -o %t.out
1111
// RUN: %HOST_RUN_PLACEHOLDER %t.out
1212
// RUN: %GPU_RUN_PLACEHOLDER %t.out
13-
// Temporary disabled on Windows
14-
// UNSUPPORTED: windows
1513

1614
#include "esimd_test_utils.hpp"
1715

@@ -24,7 +22,7 @@
2422
// test 8x16 block size
2523
//
2624
#define DIM_SIZE (1 << 13)
27-
#define SQUARE_SZ (DIM_SIZE * DIM_SIZE + 16)
25+
#define SQUARE_SZ (DIM_SIZE * DIM_SIZE)
2826

2927
#define WIDTH 16
3028
#define HEIGHT 16
@@ -101,15 +99,18 @@ int main(void) {
10199
auto ctxt = q.get_context();
102100

103101
// create and init matrices
104-
float *inputMatrix =
105-
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
106-
float *outputMatrix =
107-
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
102+
float *inputMatrix = new float[SQUARE_SZ];
103+
float *outputMatrix = new float[SQUARE_SZ];
108104
InitializeSquareMatrix(inputMatrix, DIM_SIZE, false);
109105
InitializeSquareMatrix(outputMatrix, DIM_SIZE, true);
110106

111107
try {
108+
buffer<float, 1> buf_in(inputMatrix, range<1>(SQUARE_SZ));
109+
buffer<float, 1> buf_out(outputMatrix, range<1>(SQUARE_SZ));
110+
112111
auto e = q.submit([&](handler &cgh) {
112+
auto input = buf_in.get_access<access::mode::read>(cgh);
113+
auto output = buf_out.get_access<access::mode::write>(cgh);
113114
cgh.parallel_for<class Stencil_kernel>(
114115
GlobalRange * LocalRange, [=](item<2> it) SYCL_ESIMD_KERNEL {
115116
using namespace sycl::INTEL::gpu;
@@ -125,23 +126,23 @@ int main(void) {
125126
// the code will interleave data loading and compute
126127
// first, we load enough data for the first 16 pixels
127128
//
128-
unsigned off = (v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH;
129+
unsigned off =
130+
((v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH) * sizeof(float);
129131
#pragma unroll
130132
for (unsigned i = 0; i < 10; i++) {
131-
in.row(i) = block_load<float, 32>(inputMatrix + off);
132-
off += DIM_SIZE;
133+
in.row(i) = block_load<float, 32>(input, off);
134+
off += DIM_SIZE * sizeof(float);
133135
}
134136

135137
unsigned out_off =
136-
(((v_pos * HEIGHT + 5) * DIM_SIZE + (h_pos * WIDTH) + 5)) *
137-
sizeof(float);
138+
((v_pos * HEIGHT + 5) * DIM_SIZE + (h_pos * WIDTH) + 5);
138139
simd<unsigned, WIDTH> elm16(0, 1);
139140

140141
#pragma unroll
141142
for (unsigned i = 0; i < HEIGHT; i++) {
142143

143-
in.row(10 + i) = block_load<float, 32>(inputMatrix + off);
144-
off += DIM_SIZE;
144+
in.row(10 + i) = block_load<float, 32>(input, off);
145+
off += DIM_SIZE * sizeof(float);
145146

146147
simd<float, WIDTH> sum =
147148
vin.select<WIDTH, 1>(GET_IDX(i, 5)) * -0.02f +
@@ -168,9 +169,9 @@ int main(void) {
168169
// predciate output
169170
simd<ushort, WIDTH> p = (elm16 + h_pos * WIDTH) < DIM_SIZE - 10;
170171

171-
simd<unsigned, WIDTH> elm16_off = elm16 * sizeof(float) + out_off;
172-
scatter<float, WIDTH>(outputMatrix, sum, elm16_off, p);
173-
out_off += DIM_SIZE * sizeof(float);
172+
simd<unsigned, WIDTH> elm16_off = elm16 + out_off;
173+
scatter<float, WIDTH>(output, sum, elm16_off, 0, p);
174+
out_off += DIM_SIZE;
174175

175176
if (v_pos * HEIGHT + 10 + i >= DIM_SIZE - 1)
176177
break;
@@ -180,8 +181,8 @@ int main(void) {
180181
e.wait();
181182
} catch (cl::sycl::exception const &e) {
182183
std::cout << "SYCL exception caught: " << e.what() << '\n';
183-
free(inputMatrix, ctxt);
184-
free(outputMatrix, ctxt);
184+
delete[] inputMatrix;
185+
delete[] outputMatrix;
185186
return e.get_cl_code();
186187
}
187188

@@ -192,7 +193,7 @@ int main(void) {
192193
} else {
193194
std::cout << "FAILED" << std::endl;
194195
}
195-
free(inputMatrix, ctxt);
196-
free(outputMatrix, ctxt);
196+
delete[] inputMatrix;
197+
delete[] outputMatrix;
197198
return 0;
198199
}

0 commit comments

Comments
 (0)