Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[ESIMD] Update usage of deprecated block_load/store APIs #273

Merged
merged 4 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions SYCL/ESIMD/BitonicSortK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ ESIMD_INLINE simd<ty, size> cmk_read(AccTy buf, uint32_t offset) {
offset *= sizeof(ty);
#pragma unroll
for (uint32_t i = 0; i < size; i += 32) {
v.template select<32, 1>(i) = block_load<ty, 32, AccTy>(buf, offset);
simd<ty, 32> data;
data.copy_from(buf, offset);
v.template select<32, 1>(i) = data;
offset += 32 * sizeof(ty);
}
return v;
Expand All @@ -54,7 +56,8 @@ ESIMD_INLINE void cmk_write(AccTy buf, uint32_t offset, simd<ty, size> v) {
offset *= sizeof(ty);
#pragma unroll
for (uint32_t i = 0; i < size; i += 32) {
block_store<ty, 32, AccTy>(buf, offset, v.template select<32, 1>(i));
simd<ty, 32> vals = v.template select<32, 1>(i);
vals.copy_to(buf, offset);
offset += 32 * sizeof(ty);
}
}
Expand Down
7 changes: 5 additions & 2 deletions SYCL/ESIMD/BitonicSortKv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ ESIMD_INLINE simd<ty, size> cmk_read(ty *buf, uint32_t offset) {
simd<ty, size> v;
#pragma unroll
for (uint32_t i = 0; i < size; i += 32) {
v.template select<32, 1>(i) = block_load<ty, 32>(buf + offset + i);
simd<ty, 32> data;
data.copy_from(buf + offset + i);
v.template select<32, 1>(i) = data;
}
return v;
}
Expand All @@ -52,7 +54,8 @@ template <typename ty, uint32_t size>
ESIMD_INLINE void cmk_write(ty *buf, uint32_t offset, simd<ty, size> v) {
#pragma unroll
for (uint32_t i = 0; i < size; i += 32) {
block_store<ty, 32>(buf + offset + i, v.template select<32, 1>(i));
simd<ty, 32> vals = v.template select<32, 1>(i);
vals.copy_to(buf + offset + i);
}
}

Expand Down
11 changes: 7 additions & 4 deletions SYCL/ESIMD/Prefix_Local_sum1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,18 @@ void cmk_sum_tuple_count(unsigned int *buf, unsigned int h_pos) {
simd<unsigned, 32 * TUPLE_SZ> S, T;
#pragma unroll
for (int i = 0; i < TUPLE_SZ; i++) {
S.select<32, 1>(i * 32) = block_load<unsigned, 32>(buf + offset + i * 32);
simd<unsigned, 32> data;
data.copy_from(buf + offset + i * 32);
S.select<32, 1>(i * 32) = data;
}

#pragma unroll
for (int i = 1; i < PREFIX_ENTRIES / 32; i++) {
#pragma unroll
for (int j = 0; j < TUPLE_SZ; j++) {
T.select<32, 1>(j * 32) =
block_load<unsigned, 32>(buf + offset + i * 32 * TUPLE_SZ + j * 32);
simd<unsigned, 32> data;
data.copy_from(buf + offset + i * 32 * TUPLE_SZ + j * 32);
T.select<32, 1>(j * 32) = data;
}
S += T;
}
Expand Down Expand Up @@ -109,7 +112,7 @@ void cmk_sum_tuple_count(unsigned int *buf, unsigned int h_pos) {
// This is a ULT test variant of PrefixSum kernel with different implementation
// to increase test coverage of different usage cases and help isolate bugs.
// Difference from PrefixSum kernel:
// - Use block_load<>() to read in data
// - Use copy_from<>() to read in data
// - Use scatter<>() to write output
//************************************
int main(int argc, char *argv[]) {
Expand Down
9 changes: 6 additions & 3 deletions SYCL/ESIMD/Stencil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ int main(int argc, char *argv[]) {
unsigned off = (v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH;
#pragma unroll
for (unsigned i = 0; i < 10; i++) {
in.row(i) = block_load<float, 32>(inputMatrix + off);
simd<float, 32> data;
data.copy_from(inputMatrix + off);
in.row(i) = data;
off += DIM_SIZE;
}

Expand All @@ -134,8 +136,9 @@ int main(int argc, char *argv[]) {

#pragma unroll
for (unsigned i = 0; i < HEIGHT; i++) {

in.row(10 + i) = block_load<float, 32>(inputMatrix + off);
simd<float, 32> data;
data.copy_from(inputMatrix + off);
in.row(10 + i) = data;
off += DIM_SIZE;

simd<float, WIDTH> sum =
Expand Down
8 changes: 5 additions & 3 deletions SYCL/ESIMD/accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ int main() {
using namespace sycl::ext::intel::experimental::esimd;
unsigned int offset = 0;
for (int k = 0; k < VL / 16; k++) {
simd<Ty, 16> var = block_load<Ty, 16>(acc0, offset);
simd<Ty, 16> var;
var.copy_from(acc0, offset);
var += VAL;
block_store(acc0, offset, var);
block_store(acc1, offset, var + 1);
var.copy_to(acc0, offset);
var += 1;
var.copy_to(acc1, offset);
offset += 64;
}
});
Expand Down
8 changes: 4 additions & 4 deletions SYCL/ESIMD/dp4a.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,17 @@ int main(void) {
using namespace sycl::ext::intel::experimental::esimd;

simd<DTYPE, SIZE> src0(0);
src0 = block_load<DTYPE, SIZE>(S0);
src0.copy_from(S0);

simd<DTYPE, SIZE> src1(0);
src1 = block_load<DTYPE, SIZE>(S1);
src1.copy_from(S1);

simd<DTYPE, SIZE> src2(0);
src2 = block_load<DTYPE, SIZE>(S2);
src2.copy_from(S2);

auto res =
esimd_dp4a<DTYPE, DTYPE, DTYPE, DTYPE, SIZE>(src0, src1, src2);
block_store<DTYPE, SIZE>(RES, res);
res.copy_to(RES);
});
});
e.wait();
Expand Down
5 changes: 3 additions & 2 deletions SYCL/ESIMD/ext_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,11 @@ struct DeviceFunc {

void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
unsigned int Offset = I * VL * sizeof(float);
simd<float, VL> Vx = block_load<float, VL>(In, Offset);
simd<float, VL> Vx;
Vx.copy_from(In, Offset);
DeviceMathFunc<VL, Op> DevF{};
Vx = DevF(Vx);
block_store(Out, Offset, Vx);
Vx.copy_to(Out, Offset);
};
};

Expand Down
14 changes: 10 additions & 4 deletions SYCL/ESIMD/fp_args_size/Inputs/fp_args_size_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,22 @@ int main(void) {
simd<a_data_t, SIZE> va(0);
simd<b_data_t, SIZE> vb(0);
for (int j = 0; j < ROWS; j++) {
va.select<VL, 1>(j * VL) = block_load<a_data_t, VL>(A + j * VL);
vb.select<VL, 1>(j * VL) = block_load<b_data_t, VL>(B + j * VL);
simd<a_data_t, VL> a_data;
a_data.copy_from(A + j * VL);
va.select<VL, 1>(j * VL) = a_data;
simd<b_data_t, VL> b_data;
b_data.copy_from(B + j * VL);
vb.select<VL, 1>(j * VL) = b_data;
}

auto foo = &add<simd<a_data_t, SIZE>, simd<b_data_t, SIZE>,
simd<c_data_t, SIZE>>;
auto vc = foo(va, vb);

for (int j = 0; j < ROWS; j++)
block_store<c_data_t, VL>(C + j * VL, vc.select<VL, 1>(j * VL));
for (int j = 0; j < ROWS; j++) {
simd<c_data_t, VL> vals = vc.select<VL, 1>(j * VL);
vals.copy_to(C + j * VL);
}
});
});

Expand Down
7 changes: 4 additions & 3 deletions SYCL/ESIMD/histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,10 @@ int main(int argc, char *argv[]) {
bins, offset, src, 1);
offset += 8 * sizeof(unsigned int);
#else
auto vals = block_load<unsigned int, 8>(bins + i);
vals = vals + src;
block_store<unsigned int, 8>(bins + i, vals);
simd<unsigned int, 8> vals;
vals.copy_from(bins + i);
vals = vals + src;
vals.copy_to(bins + i);
#endif
}
});
Expand Down
3 changes: 2 additions & 1 deletion SYCL/ESIMD/histogram_256_slm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
auto start_off = (linear_id * BLOCK_WIDTH * NUM_BLOCKS);
for (int y = 0; y < NUM_BLOCKS; y++) {
auto start_addr = ((unsigned int *)input_ptr) + start_off;
auto data = block_load<uint, 32>(start_addr);
simd<uint, 32> data;
data.copy_from(start_addr);
auto in = data.format<uchar>();

#pragma unroll
Expand Down
3 changes: 2 additions & 1 deletion SYCL/ESIMD/histogram_256_slm_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
auto start_off = (linear_id * BLOCK_WIDTH * num_blocks);
for (int y = 0; y < num_blocks; y++) {
auto start_addr = ((unsigned int *)input_ptr) + start_off;
auto data = block_load<uint, 32>(start_addr);
simd<uint, 32> data;
data.copy_from(start_addr);
auto in = data.format<uchar>();

#pragma unroll
Expand Down
5 changes: 3 additions & 2 deletions SYCL/ESIMD/histogram_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,10 @@ int main(int argc, char *argv[]) {
bins, offset, src, 1);
offset += 8 * sizeof(unsigned int);
#else
auto vals = block_load<unsigned int, 8>(bins + i);
simd<unsigned int, 8> vals;
vals.copy_from(bins + i);
vals = vals + src;
block_store<unsigned int, 8>(bins + i, vals);
vals.copy_to(bins + i);
#endif
}
});
Expand Down
5 changes: 3 additions & 2 deletions SYCL/ESIMD/histogram_raw_send.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,10 @@ int main(int argc, char *argv[]) {
bins, offset, src, 1);
offset += 8 * sizeof(unsigned int);
#else
auto vals = block_load<unsigned int, 8>(bins + i);
simd<unsigned int, 8> vals;
vals.copy_from(bins + i);
vals = vals + src;
block_store<unsigned int, 8>(bins + i, vals);
vals.copy_to(bins + i);
#endif
}
});
Expand Down
21 changes: 10 additions & 11 deletions SYCL/ESIMD/kmeans/kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ int main(int argc, char *argv[]) {

#pragma unroll
for (int i = 0; i < NUM_CENTROIDS_ALLOCATED / SIMD_SIZE; i++) {
centroidsXYXY.row(i) =
block_load<float, 2 * SIMD_SIZE>(kcentroids4[i].xyn);
simd<float, 2 * SIMD_SIZE> data;
data.copy_from(kcentroids4[i].xyn);
centroidsXYXY.row(i) = data;
}

simd<float, NUM_CENTROIDS_ALLOCATED> accumxsum(0);
Expand All @@ -269,8 +270,7 @@ int main(int argc, char *argv[]) {
auto pointsXY = points.format<float, 2, SIMD_SIZE>();
simd<int, SIMD_SIZE> cluster(0);

points =
block_load<float, 2 * SIMD_SIZE>(kpoints4[index + i].xyn);
points.copy_from(kpoints4[index + i].xyn);

simd<float, SIMD_SIZE> dx =
pointsXY.row(0) - centroidsXY.row(0)[0];
Expand Down Expand Up @@ -309,7 +309,7 @@ int main(int argc, char *argv[]) {
min_dist.merge(dist, (dist < min_dist));
}

block_store<int, SIMD_SIZE>(kpoints4[index + i].cluster, cluster);
cluster.copy_to(kpoints4[index + i].cluster);

#pragma unroll
for (int k = 0; k < SIMD_SIZE; k++) {
Expand Down Expand Up @@ -352,14 +352,13 @@ int main(int argc, char *argv[]) {
unsigned int offset = 0;
for (int i = 0; i < (NUM_POINTS / POINTS_PER_THREAD) / SIMD_SIZE;
i++) {
simd<float, SIMD_SIZE> t = block_load<float, SIMD_SIZE>(
kaccum4[it.get_global_id(0)].x_sum + offset);
simd<float, SIMD_SIZE> t;
t.copy_from(kaccum4[it.get_global_id(0)].x_sum + offset);
xsum += t;
t = block_load<float, SIMD_SIZE>(
kaccum4[it.get_global_id(0)].y_sum + offset);
t.copy_from(kaccum4[it.get_global_id(0)].y_sum + offset);
ysum += t;
simd<int, SIMD_SIZE> n = block_load<int, SIMD_SIZE>(
kaccum4[it.get_global_id(0)].num_points + offset);
simd<int, SIMD_SIZE> n;
n.copy_from(kaccum4[it.get_global_id(0)].num_points + offset);
npoints += n;
offset += SIMD_SIZE;
}
Expand Down
7 changes: 5 additions & 2 deletions SYCL/ESIMD/matrix_transpose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ ESIMD_INLINE simd<T, N * N> read(AccessorTy buf, int MZ, int col, int row) {
uint32_t offset = (row * MZ + col) * sizeof(T);
#pragma unroll
for (int i = 0; i < N; ++i) {
res.template select<N, 1>(i * N) = block_load<T, N>(buf, offset);
simd<T, N> data;
data.copy_from(buf, offset);
res.template select<N, 1>(i * N) = data;
offset += MZ * sizeof(T);
}
return res;
Expand All @@ -128,7 +130,8 @@ ESIMD_INLINE void write(AccessorTy buf, int MZ, int col, int row,
uint32_t offset = (row * MZ + col) * sizeof(T);
#pragma unroll
for (int i = 0; i < N; ++i) {
block_store<T, N>(buf, offset, val.template select<N, 1>(i * N));
simd<T, N> vals = val.template select<N, 1>(i * N);
vals.copy_to(buf, offset);
offset += MZ * sizeof(T);
}
}
Expand Down
7 changes: 5 additions & 2 deletions SYCL/ESIMD/matrix_transpose_glb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ ESIMD_NOINLINE void read(int *buf, int MZ, int col, int row, int GrfIdx) {
buf += row * MZ + col;
#pragma unroll
for (int i = 0; i < N; ++i) {
res.template select<N, 1>(i * N) = block_load<int, N>(buf);
simd<int, N> data;
data.copy_from(buf);
res.template select<N, 1>(i * N) = data;
buf += MZ;
}
}
Expand All @@ -129,7 +131,8 @@ ESIMD_NOINLINE void write(int *buf, int MZ, int col, int row, int GrfIdx) {
buf += row * MZ + col;
#pragma unroll
for (int i = 0; i < N; ++i) {
block_store<int, N>(buf, val.template select<N, 1>(i * N));
simd<int, N> val2 = val.template select<N, 1>(i * N);
val2.copy_to(buf);
buf += MZ;
}
}
Expand Down
7 changes: 5 additions & 2 deletions SYCL/ESIMD/matrix_transpose_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ ESIMD_INLINE simd<T, N * N> read(T *buf, int MZ, int col, int row) {
buf += row * MZ + col;
#pragma unroll
for (int i = 0; i < N; ++i) {
res.template select<N, 1>(i * N) = block_load<T, N>(buf);
simd<T, N> data;
data.copy_from(buf);
res.template select<N, 1>(i * N) = data;
buf += MZ;
}
return res;
Expand All @@ -128,7 +130,8 @@ ESIMD_INLINE void write(T *buf, int MZ, int col, int row, simd<T, N * N> val) {
buf += row * MZ + col;
#pragma unroll
for (int i = 0; i < N; ++i) {
block_store<T, N>(buf, val.template select<N, 1>(i * N));
simd<T, N> vals = val.template select<N, 1>(i * N);
vals.copy_to(buf);
buf += MZ;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,21 @@ int main(void) {
simd<a_data_t, SIZE> va(0);
simd<b_data_t, SIZE> vb(0);
for (int j = 0; j < ROWS; j++) {
va.select<VL, 1>(j * VL) = block_load<a_data_t, VL>(A + j * VL);
vb.select<VL, 1>(j * VL) = block_load<b_data_t, VL>(B + j * VL);
simd<a_data_t, VL> a_data;
a_data.copy_from(A + j * VL);
va.select<VL, 1>(j * VL) = a_data;
simd<b_data_t, VL> b_data;
b_data.copy_from(B + j * VL);
vb.select<VL, 1>(j * VL) = b_data;
}

auto vc = add<simd<a_data_t, SIZE>, simd<b_data_t, SIZE>,
simd<c_data_t, SIZE>>(va, vb);

for (int j = 0; j < ROWS; j++)
block_store<c_data_t, VL>(C + j * VL, vc.select<VL, 1>(j * VL));
for (int j = 0; j < ROWS; j++) {
simd<c_data_t, VL> vals = vc.select<VL, 1>(j * VL);
vals.copy_to(C + j * VL);
}
});
});

Expand Down
2 changes: 1 addition & 1 deletion SYCL/ESIMD/private_memory/Inputs/pm_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ template <int CASE_NUM> int test() {
for (int j = 0; j < VL; j++)
val.select<1, 1>(j) += o[j];

block_store<int, VL>(output, val);
val.copy_to(output);
});
});
e.wait();
Expand Down
Loading