Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 71c06df

Browse files
authored
[ESIMD] Add LIT tests for -fsycl-esimd-force-stateless-mem (#1104)
* [ESIMD] Add LIT tests for -fsycl-esimd-force-stateless-mem This option automatically converts accessor-based memory accesses to stateless accesses. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 8e376a1 commit 71c06df

17 files changed

+836
-589
lines changed

SYCL/ESIMD/acc_gather_scatter_rgba.cpp

Lines changed: 1 addition & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -13,184 +13,4 @@
1313
// The test checks functionality of the gather_rgba/scatter_rgba accessor-based
1414
// ESIMD intrinsics.
1515

16-
#include "esimd_test_utils.hpp"
17-
18-
#include <iostream>
19-
#include <sycl/ext/intel/esimd.hpp>
20-
#include <sycl/sycl.hpp>
21-
22-
using namespace cl::sycl;
23-
24-
template <typename T>
25-
using AccT = accessor<T, 1, access_mode::read_write, access::target::device>;
26-
27-
constexpr int MASKED_LANE_NUM_REV = 1;
28-
constexpr int NUM_RGBA_CHANNELS =
29-
get_num_channels_enabled(sycl::ext::intel::esimd::rgba_channel_mask::ABGR);
30-
31-
template <typename T, unsigned VL, unsigned STRIDE, auto CH_MASK>
32-
struct Kernel {
33-
AccT<T> InAcc;
34-
AccT<T> OutAcc;
35-
Kernel(AccT<T> InAcc, AccT<T> OutAcc) : InAcc(InAcc), OutAcc(OutAcc) {}
36-
37-
void operator()(id<1> i) const SYCL_ESIMD_KERNEL {
38-
using namespace sycl::ext::intel::esimd;
39-
constexpr int numChannels = get_num_channels_enabled(CH_MASK);
40-
41-
// Every workitem accesses contiguous block of VL * STRIDE elements,
42-
// where each element consists of RGBA channels.
43-
uint32_t global_offset = i * VL * STRIDE * NUM_RGBA_CHANNELS * sizeof(T);
44-
45-
simd<uint32_t, VL> byteOffsets(0, STRIDE * sizeof(T) * NUM_RGBA_CHANNELS);
46-
simd<T, VL * numChannels> v;
47-
if constexpr (CH_MASK == rgba_channel_mask::ABGR)
48-
// Check that the default mask value is ABGR.
49-
v = gather_rgba(InAcc, byteOffsets, global_offset);
50-
else
51-
v = gather_rgba<CH_MASK>(InAcc, byteOffsets, global_offset);
52-
v += (int)i;
53-
54-
simd_mask<VL> pred = 1;
55-
pred[VL - MASKED_LANE_NUM_REV] = 0; // mask out the last lane
56-
scatter_rgba<CH_MASK>(OutAcc, byteOffsets, v, global_offset, pred);
57-
}
58-
};
59-
60-
std::string convertMaskToStr(sycl::ext::intel::esimd::rgba_channel_mask mask) {
61-
using namespace sycl::ext::intel::esimd;
62-
switch (mask) {
63-
case rgba_channel_mask::R:
64-
return "R";
65-
case rgba_channel_mask::GR:
66-
return "GR";
67-
case rgba_channel_mask::ABGR:
68-
return "ABGR";
69-
default:
70-
return "";
71-
}
72-
return "";
73-
}
74-
75-
template <typename T, unsigned VL, unsigned STRIDE, auto CH_MASK>
76-
bool test(queue q) {
77-
size_t numWorkItems = 2;
78-
size_t size = VL * STRIDE * NUM_RGBA_CHANNELS * numWorkItems;
79-
using namespace sycl::ext::intel::esimd;
80-
constexpr int numChannels = get_num_channels_enabled(CH_MASK);
81-
82-
std::cout << "Testing T=" << typeid(T).name() << " VL=" << VL
83-
<< " STRIDE=" << STRIDE << " MASK=" << convertMaskToStr(CH_MASK)
84-
<< "...\t";
85-
86-
T *A = new T[size];
87-
T *B = new T[size];
88-
T *gold = new T[size];
89-
90-
for (int i = 0; i < size; ++i) {
91-
A[i] = (T)i;
92-
B[i] = (T)-i;
93-
gold[i] = (T)-i;
94-
}
95-
96-
// Fill out the array with gold values. The kernel only writes the elements
97-
// that are not masked. For example,
98-
// for STRIDE=1 and MASK=R, we have the following indices written:
99-
// 0, 4, 8, 12 ...
100-
// for STRIDE=2 and MASK=RG, we have the following indices written:
101-
// 0, 1, 8, 9, 16, 17 ...
102-
// All the other elements will be equal to '-A[i]'.
103-
auto blockSize = VL * STRIDE * NUM_RGBA_CHANNELS;
104-
for (unsigned i = 0; i < size; i += NUM_RGBA_CHANNELS * STRIDE)
105-
for (unsigned j = 0; j < numChannels; j++)
106-
gold[i + j] = A[i + j] + (i / (blockSize));
107-
108-
// Account for masked out last lanes (with pred argument to scatter_rgba).
109-
auto maskedElementOffset = (VL - 1) * STRIDE * NUM_RGBA_CHANNELS;
110-
for (unsigned i = maskedElementOffset; i < size; i += blockSize)
111-
for (unsigned j = 0; j < numChannels; j++)
112-
gold[i + j] = -A[i + j];
113-
114-
try {
115-
buffer<T, 1> InBuf(A, range<1>(size));
116-
buffer<T, 1> OutBuf(B, range<1>(size));
117-
range<1> glob_range{numWorkItems};
118-
auto e = q.submit([&](handler &cgh) {
119-
auto InAcc = InBuf.template get_access<access::mode::read_write>(cgh);
120-
auto OutAcc = OutBuf.template get_access<access::mode::read_write>(cgh);
121-
Kernel<T, VL, STRIDE, CH_MASK> kernel(InAcc, OutAcc);
122-
cgh.parallel_for(glob_range, kernel);
123-
});
124-
e.wait();
125-
} catch (sycl::exception const &e) {
126-
std::cerr << "SYCL exception caught: " << e.what() << '\n';
127-
delete[] A;
128-
delete[] B;
129-
delete[] gold;
130-
return false; // not success
131-
}
132-
133-
int err_cnt = 0;
134-
for (unsigned i = 0; i < size; ++i) {
135-
if (B[i] != gold[i]) {
136-
if (++err_cnt < 35) {
137-
std::cout << "\nFAILED at index " << i << ": " << B[i]
138-
<< " != " << gold[i] << " (gold)";
139-
}
140-
}
141-
}
142-
143-
if (err_cnt > 0) {
144-
std::cout << "\n pass rate: "
145-
<< ((float)(size - err_cnt) / (float)size) * 100.0f << "% ("
146-
<< (size - err_cnt) << "/" << size << ")\n";
147-
}
148-
149-
delete[] A;
150-
delete[] B;
151-
delete[] gold;
152-
153-
if (err_cnt == 0)
154-
std::cout << "Passed\n";
155-
return err_cnt == 0;
156-
}
157-
158-
template <typename T, unsigned VL, unsigned STRIDE> bool test(queue q) {
159-
using namespace sycl::ext::intel::esimd;
160-
bool passed = true;
161-
passed &= test<T, VL, STRIDE, rgba_channel_mask::R>(q);
162-
passed &= test<T, VL, STRIDE, rgba_channel_mask::GR>(q);
163-
passed &= test<T, VL, STRIDE, rgba_channel_mask::ABGR>(q);
164-
return passed;
165-
}
166-
167-
int main(void) {
168-
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
169-
170-
auto dev = q.get_device();
171-
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
172-
173-
bool passed = true;
174-
passed &= test<int, 16, 1>(q);
175-
passed &= test<int, 16, 2>(q);
176-
passed &= test<int, 16, 4>(q);
177-
passed &= test<int, 32, 1>(q);
178-
passed &= test<int, 32, 3>(q);
179-
passed &= test<int, 32, 8>(q);
180-
passed &= test<float, 16, 1>(q);
181-
passed &= test<float, 16, 2>(q);
182-
passed &= test<float, 16, 4>(q);
183-
passed &= test<float, 32, 1>(q);
184-
passed &= test<float, 32, 3>(q);
185-
passed &= test<float, 32, 8>(q);
186-
187-
passed &= test<int, 8, 1>(q);
188-
passed &= test<int, 8, 3>(q);
189-
passed &= test<int, 8, 8>(q);
190-
passed &= test<float, 8, 1>(q);
191-
passed &= test<float, 8, 2>(q);
192-
passed &= test<float, 8, 4>(q);
193-
194-
std::cout << (passed ? "All tests passed.\n" : "Some tests failed!\n");
195-
return passed ? 0 : 1;
196-
}
16+
#include "acc_gather_scatter_rgba.hpp"
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
//==-------- acc_gather_scatter_rgba.hpp - DPC++ ESIMD on-device test -----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// The test checks functionality of the gather_rgba/scatter_rgba accessor-based
10+
// ESIMD intrinsics.
11+
12+
#include "esimd_test_utils.hpp"
13+
14+
#include <iostream>
15+
#include <sycl/ext/intel/esimd.hpp>
16+
#include <sycl/sycl.hpp>
17+
18+
using namespace cl::sycl;
19+
20+
template <typename T>
21+
using AccT = accessor<T, 1, access_mode::read_write, access::target::device>;
22+
23+
constexpr int MASKED_LANE_NUM_REV = 1;
24+
constexpr int NUM_RGBA_CHANNELS =
25+
get_num_channels_enabled(sycl::ext::intel::esimd::rgba_channel_mask::ABGR);
26+
27+
template <typename T, unsigned VL, unsigned STRIDE, auto CH_MASK>
28+
struct Kernel {
29+
AccT<T> InAcc;
30+
AccT<T> OutAcc;
31+
Kernel(AccT<T> InAcc, AccT<T> OutAcc) : InAcc(InAcc), OutAcc(OutAcc) {}
32+
33+
void operator()(id<1> i) const SYCL_ESIMD_KERNEL {
34+
using namespace sycl::ext::intel::esimd;
35+
constexpr int numChannels = get_num_channels_enabled(CH_MASK);
36+
37+
// Every workitem accesses contiguous block of VL * STRIDE elements,
38+
// where each element consists of RGBA channels.
39+
uint32_t global_offset = i * VL * STRIDE * NUM_RGBA_CHANNELS * sizeof(T);
40+
41+
simd<uint32_t, VL> byteOffsets(0, STRIDE * sizeof(T) * NUM_RGBA_CHANNELS);
42+
simd<T, VL * numChannels> v;
43+
if constexpr (CH_MASK == rgba_channel_mask::ABGR)
44+
// Check that the default mask value is ABGR.
45+
v = gather_rgba(InAcc, byteOffsets, global_offset);
46+
else
47+
v = gather_rgba<CH_MASK>(InAcc, byteOffsets, global_offset);
48+
v += (int)i;
49+
50+
simd_mask<VL> pred = 1;
51+
pred[VL - MASKED_LANE_NUM_REV] = 0; // mask out the last lane
52+
scatter_rgba<CH_MASK>(OutAcc, byteOffsets, v, global_offset, pred);
53+
}
54+
};
55+
56+
std::string convertMaskToStr(sycl::ext::intel::esimd::rgba_channel_mask mask) {
57+
using namespace sycl::ext::intel::esimd;
58+
switch (mask) {
59+
case rgba_channel_mask::R:
60+
return "R";
61+
case rgba_channel_mask::GR:
62+
return "GR";
63+
case rgba_channel_mask::ABGR:
64+
return "ABGR";
65+
default:
66+
return "";
67+
}
68+
return "";
69+
}
70+
71+
template <typename T, unsigned VL, unsigned STRIDE, auto CH_MASK>
72+
bool test(queue q) {
73+
size_t numWorkItems = 2;
74+
size_t size = VL * STRIDE * NUM_RGBA_CHANNELS * numWorkItems;
75+
using namespace sycl::ext::intel::esimd;
76+
constexpr int numChannels = get_num_channels_enabled(CH_MASK);
77+
78+
std::cout << "Testing T=" << typeid(T).name() << " VL=" << VL
79+
<< " STRIDE=" << STRIDE << " MASK=" << convertMaskToStr(CH_MASK)
80+
<< "...\t";
81+
82+
T *A = new T[size];
83+
T *B = new T[size];
84+
T *gold = new T[size];
85+
86+
for (int i = 0; i < size; ++i) {
87+
A[i] = (T)i;
88+
B[i] = (T)-i;
89+
gold[i] = (T)-i;
90+
}
91+
92+
// Fill out the array with gold values. The kernel only writes the elements
93+
// that are not masked. For example,
94+
// for STRIDE=1 and MASK=R, we have the following indices written:
95+
// 0, 4, 8, 12 ...
96+
// for STRIDE=2 and MASK=RG, we have the following indices written:
97+
// 0, 1, 8, 9, 16, 17 ...
98+
// All the other elements will be equal to '-A[i]'.
99+
auto blockSize = VL * STRIDE * NUM_RGBA_CHANNELS;
100+
for (unsigned i = 0; i < size; i += NUM_RGBA_CHANNELS * STRIDE)
101+
for (unsigned j = 0; j < numChannels; j++)
102+
gold[i + j] = A[i + j] + (i / (blockSize));
103+
104+
// Account for masked out last lanes (with pred argument to scatter_rgba).
105+
auto maskedElementOffset = (VL - 1) * STRIDE * NUM_RGBA_CHANNELS;
106+
for (unsigned i = maskedElementOffset; i < size; i += blockSize)
107+
for (unsigned j = 0; j < numChannels; j++)
108+
gold[i + j] = -A[i + j];
109+
110+
try {
111+
buffer<T, 1> InBuf(A, range<1>(size));
112+
buffer<T, 1> OutBuf(B, range<1>(size));
113+
range<1> glob_range{numWorkItems};
114+
auto e = q.submit([&](handler &cgh) {
115+
auto InAcc = InBuf.template get_access<access::mode::read_write>(cgh);
116+
auto OutAcc = OutBuf.template get_access<access::mode::read_write>(cgh);
117+
Kernel<T, VL, STRIDE, CH_MASK> kernel(InAcc, OutAcc);
118+
cgh.parallel_for(glob_range, kernel);
119+
});
120+
e.wait();
121+
} catch (sycl::exception const &e) {
122+
std::cerr << "SYCL exception caught: " << e.what() << '\n';
123+
delete[] A;
124+
delete[] B;
125+
delete[] gold;
126+
return false; // not success
127+
}
128+
129+
int err_cnt = 0;
130+
for (unsigned i = 0; i < size; ++i) {
131+
if (B[i] != gold[i]) {
132+
if (++err_cnt < 35) {
133+
std::cout << "\nFAILED at index " << i << ": " << B[i]
134+
<< " != " << gold[i] << " (gold)";
135+
}
136+
}
137+
}
138+
139+
if (err_cnt > 0) {
140+
std::cout << "\n pass rate: "
141+
<< ((float)(size - err_cnt) / (float)size) * 100.0f << "% ("
142+
<< (size - err_cnt) << "/" << size << ")\n";
143+
}
144+
145+
delete[] A;
146+
delete[] B;
147+
delete[] gold;
148+
149+
if (err_cnt == 0)
150+
std::cout << "Passed\n";
151+
return err_cnt == 0;
152+
}
153+
154+
template <typename T, unsigned VL, unsigned STRIDE> bool test(queue q) {
155+
using namespace sycl::ext::intel::esimd;
156+
bool passed = true;
157+
passed &= test<T, VL, STRIDE, rgba_channel_mask::R>(q);
158+
passed &= test<T, VL, STRIDE, rgba_channel_mask::GR>(q);
159+
passed &= test<T, VL, STRIDE, rgba_channel_mask::ABGR>(q);
160+
return passed;
161+
}
162+
163+
int main(void) {
164+
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
165+
166+
auto dev = q.get_device();
167+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
168+
169+
bool passed = true;
170+
passed &= test<int, 16, 1>(q);
171+
passed &= test<int, 16, 2>(q);
172+
passed &= test<int, 16, 4>(q);
173+
passed &= test<int, 32, 1>(q);
174+
passed &= test<int, 32, 3>(q);
175+
passed &= test<int, 32, 8>(q);
176+
passed &= test<float, 16, 1>(q);
177+
passed &= test<float, 16, 2>(q);
178+
passed &= test<float, 16, 4>(q);
179+
passed &= test<float, 32, 1>(q);
180+
passed &= test<float, 32, 3>(q);
181+
passed &= test<float, 32, 8>(q);
182+
183+
passed &= test<int, 8, 1>(q);
184+
passed &= test<int, 8, 3>(q);
185+
passed &= test<int, 8, 8>(q);
186+
passed &= test<float, 8, 1>(q);
187+
passed &= test<float, 8, 2>(q);
188+
passed &= test<float, 8, 4>(q);
189+
190+
std::cout << (passed ? "All tests passed.\n" : "Some tests failed!\n");
191+
return passed ? 0 : 1;
192+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//==- acc_gather_scatter_rgba_stateless.cpp - DPC++ ESIMD on-device test --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl -fsycl-esimd-force-stateless-mem %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
//
13+
// The test checks functionality of the gather_rgba/scatter_rgba accessor-based
14+
// ESIMD intrinsics when stateless memory accesses are enforced, i.e. accessor
15+
// based accesses are automatically converted to stateless accesses.
16+
17+
#include "acc_gather_scatter_rgba.hpp"

0 commit comments

Comments
 (0)