Skip to content

Commit ea6affb

Browse files
authored
[SYCL][ESIMD] Update embargo ESIMD tests after recent API changes (intel#791)
Tests need to be updated after - Moving ESIMD API out of experimental namespace - Unembargoing lsc, nbarrier, dpas APIs
1 parent 5e7b4c3 commit ea6affb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+308
-289
lines changed

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/SparseMatrixMul_pvc/SparseMatrixMul_pvc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include <string>
2323

2424
#include <CL/sycl.hpp>
25-
#include <sycl/ext/intel/experimental/esimd.hpp>
25+
#include <sycl/ext/intel/esimd.hpp>
2626

2727
#ifdef DUMP_ENABLE
2828
#define DUMP(x) std::cout << x
@@ -77,7 +77,7 @@ struct CsrSparseMatrix {
7777

7878
using ushort = unsigned short;
7979
using namespace cl::sycl;
80-
using namespace sycl::ext::intel::experimental::esimd;
80+
using namespace sycl::ext::intel::esimd;
8181
using namespace std;
8282

8383
using IndexType = unsigned int;

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/api/fp_conversions_ats.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
#include "../../esimd_test_utils.hpp"
1717

1818
#include <CL/sycl.hpp>
19-
#include <sycl/ext/intel/experimental/esimd.hpp>
2019
#include <iostream>
20+
#include <sycl/ext/intel/esimd.hpp>
2121

2222
using namespace cl::sycl;
23+
using namespace sycl::ext::intel::esimd;
2324
using namespace sycl::ext::intel::experimental::esimd;
2425

2526
template <int N> class Test;

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/dpas_ats.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#include "../esimd_test_utils.hpp"
1414

1515
#include <CL/sycl.hpp>
16-
#include <sycl/ext/intel/experimental/esimd.hpp>
1716
#include <iostream>
17+
#include <sycl/ext/intel/esimd.hpp>
1818

1919
using namespace cl::sycl;
2020

@@ -39,28 +39,29 @@ int main(void) {
3939
nd_range<1> Range(GroupRange, TaskRange);
4040

4141
q.submit([&](handler &cgh) {
42-
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43-
using namespace sycl::ext::intel::experimental::esimd;
44-
45-
simd<char, Size * 2> va(0);
46-
auto ma = va.bit_cast_view<char, 8, 16>();
47-
ma.select<2, 1, 4, 4>(0, 0) = 4;
48-
49-
simd<char, 8 * 16> vb(0);
50-
auto mb = vb.bit_cast_view<char, 8, 16>();
51-
mb.select<8, 1, 1, 1>(0, 0) = 4;
52-
53-
simd<int, Size> vc(0);
54-
vc = dpas<EsimdPrecisionType::S2, EsimdPrecisionType::S2, 8, 8, int,
55-
int, int, Size, 32, 32>(vc, ma.bit_cast_view<int>(),
56-
mb.bit_cast_view<int>());
57-
58-
for (int i = 0; i < Size; i += VL) {
59-
simd<int, VL> output = vc.select<VL, 1>(i);
60-
output.copy_to(C + i);
61-
}
62-
});
63-
}).wait();
42+
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43+
using namespace sycl::ext::intel::esimd;
44+
using namespace sycl::ext::intel::experimental::esimd;
45+
46+
simd<char, Size * 2> va(0);
47+
auto ma = va.bit_cast_view<char, 8, 16>();
48+
ma.select<2, 1, 4, 4>(0, 0) = 4;
49+
50+
simd<char, 8 * 16> vb(0);
51+
auto mb = vb.bit_cast_view<char, 8, 16>();
52+
mb.select<8, 1, 1, 1>(0, 0) = 4;
53+
54+
simd<int, Size> vc(0);
55+
vc =
56+
dpas<argument_type::S2, argument_type::S2, 8, 8, int, int, int, Size,
57+
32, 32>(vc, ma.bit_cast_view<int>(), mb.bit_cast_view<int>());
58+
59+
for (int i = 0; i < Size; i += VL) {
60+
simd<int, VL> output = vc.select<VL, 1>(i);
61+
output.copy_to(C + i);
62+
}
63+
});
64+
}).wait();
6465

6566
int err_cnt = 0;
6667
for (unsigned i = 0; i < Size && err_cnt < 10; ++i)

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/dpas_pvc.cpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
//===----------------------------------------------------------------------===//
88
// REQUIRES: gpu
99
// UNSUPPORTED: cuda
10-
// RUN: %clangxx -fsycl -DESIMD_GEN12_7 %s -o %t.out
10+
// RUN: %clangxx -fsycl -DESIMD_XE_HPG %s -o %t.out
1111
// RUN: %GPU_RUN_PLACEHOLDER %t.out
1212

1313
#include "../esimd_test_utils.hpp"
1414

1515
#include <CL/sycl.hpp>
16-
#include <sycl/ext/intel/experimental/esimd.hpp>
1716
#include <iostream>
17+
#include <sycl/ext/intel/esimd.hpp>
1818

1919
using namespace cl::sycl;
2020

@@ -39,28 +39,29 @@ int main(void) {
3939
nd_range<1> Range(GroupRange, TaskRange);
4040

4141
q.submit([&](handler &cgh) {
42-
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43-
using namespace sycl::ext::intel::experimental::esimd;
44-
45-
simd<char, Size * 2> va(0);
46-
auto ma = va.bit_cast_view<char, 8, 32>();
47-
ma.select<2, 1, 8, 4>(0, 0) = 4;
48-
49-
simd<char, Size> vb(0);
50-
auto mb = vb.bit_cast_view<char, 8, 16>();
51-
mb.select<8, 1, 1, 1>(0, 0) = 4;
52-
53-
simd<int, Size> vc(0);
54-
vc = dpas<EsimdPrecisionType::S2, EsimdPrecisionType::S2, 8, 8, int,
55-
int, int, Size, 64, 32>(vc, ma.bit_cast_view<int>(),
56-
mb.bit_cast_view<int>());
57-
58-
for (int i = 0; i < Size; i += VL) {
59-
simd<int, VL> output = vc.select<VL, 1>(i);
60-
output.copy_to(C + i);
61-
}
62-
});
63-
}).wait();
42+
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43+
using namespace sycl::ext::intel::esimd;
44+
using namespace sycl::ext::intel::experimental::esimd;
45+
46+
simd<char, Size * 2> va(0);
47+
auto ma = va.bit_cast_view<char, 8, 32>();
48+
ma.select<2, 1, 8, 4>(0, 0) = 4;
49+
50+
simd<char, Size> vb(0);
51+
auto mb = vb.bit_cast_view<char, 8, 16>();
52+
mb.select<8, 1, 1, 1>(0, 0) = 4;
53+
54+
simd<int, Size> vc(0);
55+
vc =
56+
dpas<argument_type::S2, argument_type::S2, 8, 8, int, int, int, Size,
57+
64, 32>(vc, ma.bit_cast_view<int>(), mb.bit_cast_view<int>());
58+
59+
for (int i = 0; i < Size; i += VL) {
60+
simd<int, VL> output = vc.select<VL, 1>(i);
61+
output.copy_to(C + i);
62+
}
63+
});
64+
}).wait();
6465

6566
int err_cnt = 0;
6667
for (unsigned i = 0; i < Size && err_cnt < 10; ++i)

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/dpasw_ats.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#include "../esimd_test_utils.hpp"
1414

1515
#include <CL/sycl.hpp>
16-
#include <sycl/ext/intel/experimental/esimd.hpp>
1716
#include <iostream>
17+
#include <sycl/ext/intel/esimd.hpp>
1818

1919
using namespace cl::sycl;
2020

@@ -39,28 +39,29 @@ int main(void) {
3939
nd_range<1> Range(GroupRange, TaskRange);
4040

4141
q.submit([&](handler &cgh) {
42-
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43-
using namespace sycl::ext::intel::experimental::esimd;
44-
45-
simd<char, Size * 2> va(0);
46-
auto ma = va.bit_cast_view<char, 8, 16>();
47-
ma.select<2, 1, 4, 4>(0, 0) = 4;
48-
49-
simd<char, 8 * 8> vb(0);
50-
auto mb = vb.bit_cast_view<char, 8, 8>();
51-
mb.select<4, 2, 1, 1>(0, 0) = 4;
52-
53-
simd<int, Size> vc(0);
54-
vc = dpasw<EsimdPrecisionType::S2, EsimdPrecisionType::S2, 8, 8,
55-
int, int, int, Size, 32, 16>(vc, ma.bit_cast_view<int>(),
56-
mb.bit_cast_view<int>());
57-
58-
for (int i = 0; i < Size; i += VL) {
59-
simd<int, VL> output = vc.select<VL, 1>(i);
60-
output.copy_to(C + i);
61-
}
62-
});
63-
}).wait();
42+
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
43+
using namespace sycl::ext::intel::esimd;
44+
using namespace sycl::ext::intel::experimental::esimd;
45+
46+
simd<char, Size * 2> va(0);
47+
auto ma = va.bit_cast_view<char, 8, 16>();
48+
ma.select<2, 1, 4, 4>(0, 0) = 4;
49+
50+
simd<char, 8 * 8> vb(0);
51+
auto mb = vb.bit_cast_view<char, 8, 8>();
52+
mb.select<4, 2, 1, 1>(0, 0) = 4;
53+
54+
simd<int, Size> vc(0);
55+
vc = dpasw<argument_type::S2, argument_type::S2, 8, 8, int, int, int,
56+
Size, 32, 16>(vc, ma.bit_cast_view<int>(),
57+
mb.bit_cast_view<int>());
58+
59+
for (int i = 0; i < Size; i += VL) {
60+
simd<int, VL> output = vc.select<VL, 1>(i);
61+
output.copy_to(C + i);
62+
}
63+
});
64+
}).wait();
6465

6566
int err_cnt = 0;
6667
for (unsigned i = 0; i < Size && err_cnt < 10; ++i)

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/histogram_2d_ats.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
#include "../esimd_test_utils.hpp"
1414

1515
#include <CL/sycl.hpp>
16-
#include <sycl/ext/intel/experimental/esimd.hpp>
1716
#include <array>
1817
#include <iostream>
18+
#include <sycl/ext/intel/esimd.hpp>
1919

2020
using namespace cl::sycl;
2121

@@ -147,7 +147,7 @@ int main(int argc, char *argv[]) {
147147

148148
cgh.parallel_for<class Hist>(
149149
Range, [=](nd_item<2> ndi) SYCL_ESIMD_KERNEL {
150-
using namespace sycl::ext::intel::experimental::esimd;
150+
using namespace sycl::ext::intel::esimd;
151151

152152
// Get thread origin offsets
153153
uint h_pos = ndi.get_group(0) * BLOCK_WIDTH;
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]) {
190190
src = histogram.select<8, 1>(i);
191191

192192
#ifdef __SYCL_DEVICE_ONLY__
193-
atomic_update<sycl::ext::intel::experimental::esimd::atomic_op::add, unsigned int, 8>(
194-
bins, offset, src, 1);
193+
atomic_update<sycl::ext::intel::esimd::atomic_op::add,
194+
unsigned int, 8>(bins, offset, src, 1);
195195
offset += 8 * sizeof(unsigned int);
196196
#else
197197
simd<unsigned int, 8> vals;

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/histogram_raw_send_ats.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
#include "../esimd_test_utils.hpp"
1414

1515
#include <CL/sycl.hpp>
16-
#include <sycl/ext/intel/experimental/esimd.hpp>
1716
#include <array>
1817
#include <iostream>
18+
#include <sycl/ext/intel/esimd.hpp>
1919

2020
using namespace cl::sycl;
21+
using namespace sycl::ext::intel::esimd;
2122
using namespace sycl::ext::intel::experimental::esimd;
2223

2324
#define NUM_BINS 256
@@ -61,7 +62,7 @@ int checkHistogram(unsigned int *refHistogram, unsigned int *hist) {
6162
return 1;
6263
}
6364

64-
template <EsimdAtomicOpType Op, typename T, int n>
65+
template <atomic_op Op, typename T, int n>
6566
ESIMD_INLINE void atomic_write(T *bins, simd<unsigned, n> offset,
6667
simd<T, n> src0, simd_mask<n> pred) {
6768
simd<T, n> oldDst;
@@ -216,8 +217,8 @@ int main(int argc, char *argv[]) {
216217
#ifdef __SYCL_DEVICE_ONLY__
217218
// flat_atomic<EsimdAtomicOpType::ATOMIC_ADD, unsigned int,
218219
// 8>(bins, offset, src, 1);
219-
atomic_write<sycl::ext::intel::experimental::esimd::atomic_op::add, unsigned int, 8>(
220-
bins, offset, src, 1);
220+
atomic_update<sycl::ext::intel::esimd::atomic_op::add,
221+
unsigned int, 8>(bins, offset, src, 1);
221222
offset += 8 * sizeof(unsigned int);
222223
#else
223224
simd<unsigned int, 8> vals;

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc/Inputs/lsc_block_load.hpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
#include <CL/sycl.hpp>
2-
#include <sycl/ext/intel/experimental/esimd.hpp>
2+
#include <sycl/ext/intel/esimd.hpp>
33

44
#include <iostream>
55

66
#include "common.hpp"
77

88
using namespace cl::sycl;
9+
using namespace sycl::ext::intel::esimd;
10+
using namespace sycl::ext::intel::esimd::detail;
911
using namespace sycl::ext::intel::experimental::esimd;
1012
using namespace sycl::ext::intel::experimental::esimd::detail;
1113

1214
template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
1315
int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1416
bool Transposed = false, bool Transformed = false,
15-
CacheHint L1H = CacheHint::None, CacheHint L3H = CacheHint::None,
17+
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
1618
bool use_prefetch = false>
1719
bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
1820
int X, int Y) {
@@ -77,7 +79,7 @@ bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
7779
* and rounds up BlockHeight.
7880
*/
7981
constexpr int SH = Transformed
80-
? roundUpNextMultiple(BlockHeight, 4 / sizeof(T))
82+
? roundUpNextMultiple<BlockHeight, 4 / sizeof(T)>()
8183
: BlockHeight;
8284
constexpr int SW = Transformed ? getNextPowerOf2<BlockWidth>() : BlockWidth;
8385
constexpr int SN = get_lsc_block_2d_data_size<T, 1u, 1u, SW, false, false>();
@@ -126,23 +128,22 @@ bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
126128

127129
simd<T, N> vals;
128130
if constexpr (use_prefetch) {
129-
lsc_flat_prefetch2d<T, BlockWidth, BlockHeight, NBlocks,
130-
Transposed, Transformed, L1H, L3H>(
131-
in + off, width, height, pitch, X, Y);
132-
vals = lsc_flat_load2d<T, BlockWidth, BlockHeight, NBlocks,
133-
Transposed, Transformed>(
131+
lsc_prefetch2d<T, BlockWidth, BlockHeight, NBlocks, L1H, L3H>(
134132
in + off, width, height, pitch, X, Y);
133+
vals =
134+
lsc_load2d<T, BlockWidth, BlockHeight, NBlocks, Transposed,
135+
Transformed>(in + off, width, height, pitch, X, Y);
135136
} else {
136-
vals = lsc_flat_load2d<T, BlockWidth, BlockHeight, NBlocks,
137-
Transposed, Transformed, L1H, L3H>(
138-
in + off, width, height, pitch, X, Y);
137+
vals = lsc_load2d<T, BlockWidth, BlockHeight, NBlocks, Transposed,
138+
Transformed, L1H, L3H>(in + off, width, height,
139+
pitch, X, Y);
139140
}
140141

141142
for (int i = 0; i < NBlocks; i++) {
142143
for (int j = 0; j < SH; j++) {
143144
simd<T, SN> v =
144145
vals.template select<SN, 1>(i * SN * SH + j * SW);
145-
lsc_flat_store2d<T, SW>(
146+
lsc_store2d<T, SW>(
146147
out + off, SurfaceWidth * sizeof(T) - 1, SurfaceHeight - 1,
147148
SurfacePitch * sizeof(T) - 1, X + i * SW, Y + j, v);
148149
}

SYCL_ESIMD_EMBARGO/ESIMD/EMBARGO/lsc/Inputs/lsc_block_store.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
#include <CL/sycl.hpp>
2-
#include <sycl/ext/intel/experimental/esimd.hpp>
2+
#include <sycl/ext/intel/esimd.hpp>
33

44
#include <iostream>
55

66
#include "common.hpp"
77

88
using namespace cl::sycl;
9+
using namespace sycl::ext::intel::esimd;
910
using namespace sycl::ext::intel::experimental::esimd;
11+
using namespace sycl::ext::intel::experimental::esimd::detail;
1012

1113
template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
1214
int BlockWidth, int BlockHeight = 1,
1315
int N = get_lsc_block_2d_data_size<T, 1u, BlockHeight, BlockWidth,
1416
false, false>(),
15-
CacheHint L1H = CacheHint::None, CacheHint L3H = CacheHint::None>
17+
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
1618
bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
1719
int X, int Y) {
1820
static_assert(BlockWidth > 0, "Block width must be positive");
@@ -55,10 +57,9 @@ bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch,
5557

5658
simd<T, N> vals(new_val + off, 1);
5759
// IUT
58-
lsc_flat_store2d<T, BlockWidth, BlockHeight, false, false, L1H,
59-
L3H>(out + off, SurfaceWidth * sizeof(T) - 1,
60-
SurfaceHeight - 1,
61-
SurfacePitch * sizeof(T) - 1, X, Y, vals);
60+
lsc_store2d<T, BlockWidth, BlockHeight, L1H, L3H>(
61+
out + off, SurfaceWidth * sizeof(T) - 1, SurfaceHeight - 1,
62+
SurfacePitch * sizeof(T) - 1, X, Y, vals);
6263
});
6364
});
6465
e.wait();

0 commit comments

Comments
 (0)