Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 03e4e9b

Browse files
[ESIMD] Update usage of deprecated block_load/store APIs (#273)
1 parent 78d1351 commit 03e4e9b

32 files changed

+150
-91
lines changed

SYCL/ESIMD/BitonicSortK.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ ESIMD_INLINE simd<ty, size> cmk_read(AccTy buf, uint32_t offset) {
4343
offset *= sizeof(ty);
4444
#pragma unroll
4545
for (uint32_t i = 0; i < size; i += 32) {
46-
v.template select<32, 1>(i) = block_load<ty, 32, AccTy>(buf, offset);
46+
simd<ty, 32> data;
47+
data.copy_from(buf, offset);
48+
v.template select<32, 1>(i) = data;
4749
offset += 32 * sizeof(ty);
4850
}
4951
return v;
@@ -54,7 +56,8 @@ ESIMD_INLINE void cmk_write(AccTy buf, uint32_t offset, simd<ty, size> v) {
5456
offset *= sizeof(ty);
5557
#pragma unroll
5658
for (uint32_t i = 0; i < size; i += 32) {
57-
block_store<ty, 32, AccTy>(buf, offset, v.template select<32, 1>(i));
59+
simd<ty, 32> vals = v.template select<32, 1>(i);
60+
vals.copy_to(buf, offset);
5861
offset += 32 * sizeof(ty);
5962
}
6063
}

SYCL/ESIMD/BitonicSortKv2.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ ESIMD_INLINE simd<ty, size> cmk_read(ty *buf, uint32_t offset) {
4343
simd<ty, size> v;
4444
#pragma unroll
4545
for (uint32_t i = 0; i < size; i += 32) {
46-
v.template select<32, 1>(i) = block_load<ty, 32>(buf + offset + i);
46+
simd<ty, 32> data;
47+
data.copy_from(buf + offset + i);
48+
v.template select<32, 1>(i) = data;
4749
}
4850
return v;
4951
}
@@ -52,7 +54,8 @@ template <typename ty, uint32_t size>
5254
ESIMD_INLINE void cmk_write(ty *buf, uint32_t offset, simd<ty, size> v) {
5355
#pragma unroll
5456
for (uint32_t i = 0; i < size; i += 32) {
55-
block_store<ty, 32>(buf + offset + i, v.template select<32, 1>(i));
57+
simd<ty, 32> vals = v.template select<32, 1>(i);
58+
vals.copy_to(buf + offset + i);
5659
}
5760
}
5861

SYCL/ESIMD/Prefix_Local_sum1.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,18 @@ void cmk_sum_tuple_count(unsigned int *buf, unsigned int h_pos) {
7070
simd<unsigned, 32 * TUPLE_SZ> S, T;
7171
#pragma unroll
7272
for (int i = 0; i < TUPLE_SZ; i++) {
73-
S.select<32, 1>(i * 32) = block_load<unsigned, 32>(buf + offset + i * 32);
73+
simd<unsigned, 32> data;
74+
data.copy_from(buf + offset + i * 32);
75+
S.select<32, 1>(i * 32) = data;
7476
}
7577

7678
#pragma unroll
7779
for (int i = 1; i < PREFIX_ENTRIES / 32; i++) {
7880
#pragma unroll
7981
for (int j = 0; j < TUPLE_SZ; j++) {
80-
T.select<32, 1>(j * 32) =
81-
block_load<unsigned, 32>(buf + offset + i * 32 * TUPLE_SZ + j * 32);
82+
simd<unsigned, 32> data;
83+
data.copy_from(buf + offset + i * 32 * TUPLE_SZ + j * 32);
84+
T.select<32, 1>(j * 32) = data;
8285
}
8386
S += T;
8487
}
@@ -109,7 +112,7 @@ void cmk_sum_tuple_count(unsigned int *buf, unsigned int h_pos) {
109112
// This is a ULT test variant of PrefixSum kernel with different implementation
110113
// to increase test coverage of different usage cases and help isolate bugs.
111114
// Difference from PrefixSum kernel:
112-
// - Use block_load<>() to read in data
115+
// - Use copy_from<>() to read in data
113116
// - Use scatter<>() to write output
114117
//************************************
115118
int main(int argc, char *argv[]) {

SYCL/ESIMD/Stencil.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ int main(int argc, char *argv[]) {
123123
unsigned off = (v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH;
124124
#pragma unroll
125125
for (unsigned i = 0; i < 10; i++) {
126-
in.row(i) = block_load<float, 32>(inputMatrix + off);
126+
simd<float, 32> data;
127+
data.copy_from(inputMatrix + off);
128+
in.row(i) = data;
127129
off += DIM_SIZE;
128130
}
129131

@@ -134,8 +136,9 @@ int main(int argc, char *argv[]) {
134136

135137
#pragma unroll
136138
for (unsigned i = 0; i < HEIGHT; i++) {
137-
138-
in.row(10 + i) = block_load<float, 32>(inputMatrix + off);
139+
simd<float, 32> data;
140+
data.copy_from(inputMatrix + off);
141+
in.row(10 + i) = data;
139142
off += DIM_SIZE;
140143

141144
simd<float, WIDTH> sum =

SYCL/ESIMD/accessor.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,12 @@ int main() {
5353
using namespace sycl::ext::intel::experimental::esimd;
5454
unsigned int offset = 0;
5555
for (int k = 0; k < VL / 16; k++) {
56-
simd<Ty, 16> var = block_load<Ty, 16>(acc0, offset);
56+
simd<Ty, 16> var;
57+
var.copy_from(acc0, offset);
5758
var += VAL;
58-
block_store(acc0, offset, var);
59-
block_store(acc1, offset, var + 1);
59+
var.copy_to(acc0, offset);
60+
var += 1;
61+
var.copy_to(acc1, offset);
6062
offset += 64;
6163
}
6264
});

SYCL/ESIMD/dp4a.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,17 @@ int main(void) {
6060
using namespace sycl::ext::intel::experimental::esimd;
6161

6262
simd<DTYPE, SIZE> src0(0);
63-
src0 = block_load<DTYPE, SIZE>(S0);
63+
src0.copy_from(S0);
6464

6565
simd<DTYPE, SIZE> src1(0);
66-
src1 = block_load<DTYPE, SIZE>(S1);
66+
src1.copy_from(S1);
6767

6868
simd<DTYPE, SIZE> src2(0);
69-
src2 = block_load<DTYPE, SIZE>(S2);
69+
src2.copy_from(S2);
7070

7171
auto res =
7272
esimd_dp4a<DTYPE, DTYPE, DTYPE, DTYPE, SIZE>(src0, src1, src2);
73-
block_store<DTYPE, SIZE>(RES, res);
73+
res.copy_to(RES);
7474
});
7575
});
7676
e.wait();

SYCL/ESIMD/ext_math.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ struct DeviceFunc {
8080

8181
void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
8282
unsigned int Offset = I * VL * sizeof(float);
83-
simd<float, VL> Vx = block_load<float, VL>(In, Offset);
83+
simd<float, VL> Vx;
84+
Vx.copy_from(In, Offset);
8485
DeviceMathFunc<VL, Op> DevF{};
8586
Vx = DevF(Vx);
86-
block_store(Out, Offset, Vx);
87+
Vx.copy_to(Out, Offset);
8788
};
8889
};
8990

SYCL/ESIMD/fp_args_size/Inputs/fp_args_size_common.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,22 @@ int main(void) {
6565
simd<a_data_t, SIZE> va(0);
6666
simd<b_data_t, SIZE> vb(0);
6767
for (int j = 0; j < ROWS; j++) {
68-
va.select<VL, 1>(j * VL) = block_load<a_data_t, VL>(A + j * VL);
69-
vb.select<VL, 1>(j * VL) = block_load<b_data_t, VL>(B + j * VL);
68+
simd<a_data_t, VL> a_data;
69+
a_data.copy_from(A + j * VL);
70+
va.select<VL, 1>(j * VL) = a_data;
71+
simd<b_data_t, VL> b_data;
72+
b_data.copy_from(B + j * VL);
73+
vb.select<VL, 1>(j * VL) = b_data;
7074
}
7175

7276
auto foo = &add<simd<a_data_t, SIZE>, simd<b_data_t, SIZE>,
7377
simd<c_data_t, SIZE>>;
7478
auto vc = foo(va, vb);
7579

76-
for (int j = 0; j < ROWS; j++)
77-
block_store<c_data_t, VL>(C + j * VL, vc.select<VL, 1>(j * VL));
80+
for (int j = 0; j < ROWS; j++) {
81+
simd<c_data_t, VL> vals = vc.select<VL, 1>(j * VL);
82+
vals.copy_to(C + j * VL);
83+
}
7884
});
7985
});
8086

SYCL/ESIMD/histogram.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,10 @@ int main(int argc, char *argv[]) {
203203
bins, offset, src, 1);
204204
offset += 8 * sizeof(unsigned int);
205205
#else
206-
auto vals = block_load<unsigned int, 8>(bins + i);
207-
vals = vals + src;
208-
block_store<unsigned int, 8>(bins + i, vals);
206+
simd<unsigned int, 8> vals;
207+
vals.copy_from(bins + i);
208+
vals = vals + src;
209+
vals.copy_to(bins + i);
209210
#endif
210211
}
211212
});

SYCL/ESIMD/histogram_256_slm.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
4343
auto start_off = (linear_id * BLOCK_WIDTH * NUM_BLOCKS);
4444
for (int y = 0; y < NUM_BLOCKS; y++) {
4545
auto start_addr = ((unsigned int *)input_ptr) + start_off;
46-
auto data = block_load<uint, 32>(start_addr);
46+
simd<uint, 32> data;
47+
data.copy_from(start_addr);
4748
auto in = data.format<uchar>();
4849

4950
#pragma unroll

SYCL/ESIMD/histogram_256_slm_spec.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
4444
auto start_off = (linear_id * BLOCK_WIDTH * num_blocks);
4545
for (int y = 0; y < num_blocks; y++) {
4646
auto start_addr = ((unsigned int *)input_ptr) + start_off;
47-
auto data = block_load<uint, 32>(start_addr);
47+
simd<uint, 32> data;
48+
data.copy_from(start_addr);
4849
auto in = data.format<uchar>();
4950

5051
#pragma unroll

SYCL/ESIMD/histogram_2d.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,10 @@ int main(int argc, char *argv[]) {
195195
bins, offset, src, 1);
196196
offset += 8 * sizeof(unsigned int);
197197
#else
198-
auto vals = block_load<unsigned int, 8>(bins + i);
198+
simd<unsigned int, 8> vals;
199+
vals.copy_from(bins + i);
199200
vals = vals + src;
200-
block_store<unsigned int, 8>(bins + i, vals);
201+
vals.copy_to(bins + i);
201202
#endif
202203
}
203204
});

SYCL/ESIMD/histogram_raw_send.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,10 @@ int main(int argc, char *argv[]) {
220220
bins, offset, src, 1);
221221
offset += 8 * sizeof(unsigned int);
222222
#else
223-
auto vals = block_load<unsigned int, 8>(bins + i);
223+
simd<unsigned int, 8> vals;
224+
vals.copy_from(bins + i);
224225
vals = vals + src;
225-
block_store<unsigned int, 8>(bins + i, vals);
226+
vals.copy_to(bins + i);
226227
#endif
227228
}
228229
});

SYCL/ESIMD/kmeans/kmeans.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,9 @@ int main(int argc, char *argv[]) {
243243

244244
#pragma unroll
245245
for (int i = 0; i < NUM_CENTROIDS_ALLOCATED / SIMD_SIZE; i++) {
246-
centroidsXYXY.row(i) =
247-
block_load<float, 2 * SIMD_SIZE>(kcentroids4[i].xyn);
246+
simd<float, 2 * SIMD_SIZE> data;
247+
data.copy_from(kcentroids4[i].xyn);
248+
centroidsXYXY.row(i) = data;
248249
}
249250

250251
simd<float, NUM_CENTROIDS_ALLOCATED> accumxsum(0);
@@ -269,8 +270,7 @@ int main(int argc, char *argv[]) {
269270
auto pointsXY = points.format<float, 2, SIMD_SIZE>();
270271
simd<int, SIMD_SIZE> cluster(0);
271272

272-
points =
273-
block_load<float, 2 * SIMD_SIZE>(kpoints4[index + i].xyn);
273+
points.copy_from(kpoints4[index + i].xyn);
274274

275275
simd<float, SIMD_SIZE> dx =
276276
pointsXY.row(0) - centroidsXY.row(0)[0];
@@ -309,7 +309,7 @@ int main(int argc, char *argv[]) {
309309
min_dist.merge(dist, (dist < min_dist));
310310
}
311311

312-
block_store<int, SIMD_SIZE>(kpoints4[index + i].cluster, cluster);
312+
cluster.copy_to(kpoints4[index + i].cluster);
313313

314314
#pragma unroll
315315
for (int k = 0; k < SIMD_SIZE; k++) {
@@ -352,14 +352,13 @@ int main(int argc, char *argv[]) {
352352
unsigned int offset = 0;
353353
for (int i = 0; i < (NUM_POINTS / POINTS_PER_THREAD) / SIMD_SIZE;
354354
i++) {
355-
simd<float, SIMD_SIZE> t = block_load<float, SIMD_SIZE>(
356-
kaccum4[it.get_global_id(0)].x_sum + offset);
355+
simd<float, SIMD_SIZE> t;
356+
t.copy_from(kaccum4[it.get_global_id(0)].x_sum + offset);
357357
xsum += t;
358-
t = block_load<float, SIMD_SIZE>(
359-
kaccum4[it.get_global_id(0)].y_sum + offset);
358+
t.copy_from(kaccum4[it.get_global_id(0)].y_sum + offset);
360359
ysum += t;
361-
simd<int, SIMD_SIZE> n = block_load<int, SIMD_SIZE>(
362-
kaccum4[it.get_global_id(0)].num_points + offset);
360+
simd<int, SIMD_SIZE> n;
361+
n.copy_from(kaccum4[it.get_global_id(0)].num_points + offset);
363362
npoints += n;
364363
offset += SIMD_SIZE;
365364
}

SYCL/ESIMD/matrix_transpose.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ ESIMD_INLINE simd<T, N * N> read(AccessorTy buf, int MZ, int col, int row) {
115115
uint32_t offset = (row * MZ + col) * sizeof(T);
116116
#pragma unroll
117117
for (int i = 0; i < N; ++i) {
118-
res.template select<N, 1>(i * N) = block_load<T, N>(buf, offset);
118+
simd<T, N> data;
119+
data.copy_from(buf, offset);
120+
res.template select<N, 1>(i * N) = data;
119121
offset += MZ * sizeof(T);
120122
}
121123
return res;
@@ -128,7 +130,8 @@ ESIMD_INLINE void write(AccessorTy buf, int MZ, int col, int row,
128130
uint32_t offset = (row * MZ + col) * sizeof(T);
129131
#pragma unroll
130132
for (int i = 0; i < N; ++i) {
131-
block_store<T, N>(buf, offset, val.template select<N, 1>(i * N));
133+
simd<T, N> vals = val.template select<N, 1>(i * N);
134+
vals.copy_to(buf, offset);
132135
offset += MZ * sizeof(T);
133136
}
134137
}

SYCL/ESIMD/matrix_transpose_glb.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ ESIMD_NOINLINE void read(int *buf, int MZ, int col, int row, int GrfIdx) {
117117
buf += row * MZ + col;
118118
#pragma unroll
119119
for (int i = 0; i < N; ++i) {
120-
res.template select<N, 1>(i * N) = block_load<int, N>(buf);
120+
simd<int, N> data;
121+
data.copy_from(buf);
122+
res.template select<N, 1>(i * N) = data;
121123
buf += MZ;
122124
}
123125
}
@@ -129,7 +131,8 @@ ESIMD_NOINLINE void write(int *buf, int MZ, int col, int row, int GrfIdx) {
129131
buf += row * MZ + col;
130132
#pragma unroll
131133
for (int i = 0; i < N; ++i) {
132-
block_store<int, N>(buf, val.template select<N, 1>(i * N));
134+
simd<int, N> val2 = val.template select<N, 1>(i * N);
135+
val2.copy_to(buf);
133136
buf += MZ;
134137
}
135138
}

SYCL/ESIMD/matrix_transpose_usm.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ ESIMD_INLINE simd<T, N * N> read(T *buf, int MZ, int col, int row) {
116116
buf += row * MZ + col;
117117
#pragma unroll
118118
for (int i = 0; i < N; ++i) {
119-
res.template select<N, 1>(i * N) = block_load<T, N>(buf);
119+
simd<T, N> data;
120+
data.copy_from(buf);
121+
res.template select<N, 1>(i * N) = data;
120122
buf += MZ;
121123
}
122124
return res;
@@ -128,7 +130,8 @@ ESIMD_INLINE void write(T *buf, int MZ, int col, int row, simd<T, N * N> val) {
128130
buf += row * MZ + col;
129131
#pragma unroll
130132
for (int i = 0; i < N; ++i) {
131-
block_store<T, N>(buf, val.template select<N, 1>(i * N));
133+
simd<T, N> vals = val.template select<N, 1>(i * N);
134+
vals.copy_to(buf);
132135
buf += MZ;
133136
}
134137
}

SYCL/ESIMD/noinline_args_size/Inputs/noinline_args_size_common.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,21 @@ int main(void) {
6464
simd<a_data_t, SIZE> va(0);
6565
simd<b_data_t, SIZE> vb(0);
6666
for (int j = 0; j < ROWS; j++) {
67-
va.select<VL, 1>(j * VL) = block_load<a_data_t, VL>(A + j * VL);
68-
vb.select<VL, 1>(j * VL) = block_load<b_data_t, VL>(B + j * VL);
67+
simd<a_data_t, VL> a_data;
68+
a_data.copy_from(A + j * VL);
69+
va.select<VL, 1>(j * VL) = a_data;
70+
simd<b_data_t, VL> b_data;
71+
b_data.copy_from(B + j * VL);
72+
vb.select<VL, 1>(j * VL) = b_data;
6973
}
7074

7175
auto vc = add<simd<a_data_t, SIZE>, simd<b_data_t, SIZE>,
7276
simd<c_data_t, SIZE>>(va, vb);
7377

74-
for (int j = 0; j < ROWS; j++)
75-
block_store<c_data_t, VL>(C + j * VL, vc.select<VL, 1>(j * VL));
78+
for (int j = 0; j < ROWS; j++) {
79+
simd<c_data_t, VL> vals = vc.select<VL, 1>(j * VL);
80+
vals.copy_to(C + j * VL);
81+
}
7682
});
7783
});
7884

SYCL/ESIMD/private_memory/Inputs/pm_common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ template <int CASE_NUM> int test() {
143143
for (int j = 0; j < VL; j++)
144144
val.select<1, 1>(j) += o[j];
145145

146-
block_store<int, VL>(output, val);
146+
val.copy_to(output);
147147
});
148148
});
149149
e.wait();

0 commit comments

Comments
 (0)