Skip to content

Commit 7179d3a

Browse files
MochalovaAnAlexeySachkov
authored andcommitted
[SYCL] Add tests for inline asm feature
Signed-off-by: amochalo <[email protected]>
1 parent eeca872 commit 7179d3a

22 files changed

+1679
-0
lines changed

sycl/test/inline-asm/asm_16_empty.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// UNSUPPORTED: cuda
2+
// REQUIRES: gpu,linux
3+
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
4+
// RUN: %t.out
5+
6+
#include "include/asmcheck.h"
7+
#include <CL/sycl.hpp>
8+
#include <iostream>
9+
#include <string>
10+
#include <vector>
11+
12+
constexpr int LIST_SIZE = 1024;
13+
using arr_t = std::vector<cl::sycl::cl_int>;
14+
constexpr auto sycl_write = cl::sycl::access::mode::write;
15+
16+
// class is used for kernel name
17+
template <typename T>
18+
class no_opts;
19+
20+
template <typename T>
21+
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
22+
cl::sycl::range<1> numOfItems{sz};
23+
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
24+
25+
deviceQueue.submit([&](cl::sycl::handler &cgh) {
26+
auto C = bufferC.template get_access<sycl_write>(cgh);
27+
28+
auto kern = [C](cl::sycl::id<1> wiID)
29+
[[cl::intel_reqd_sub_group_size(16)]] {
30+
C[wiID] = 43;
31+
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
32+
asm volatile("");
33+
#endif
34+
};
35+
cgh.parallel_for<class no_opts<T>>(numOfItems, kern);
36+
});
37+
};
38+
39+
int main() {
40+
arr_t C(LIST_SIZE);
41+
42+
cl::sycl::gpu_selector gpsel;
43+
cl::sycl::queue deviceQueue(gpsel);
44+
45+
sycl::device Device = deviceQueue.get_device();
46+
47+
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
48+
std::cout << "Skipping test\n";
49+
return 0;
50+
}
51+
52+
for (int i = 0; i < LIST_SIZE; i++) {
53+
C[i] = 0;
54+
}
55+
56+
process_buffers(deviceQueue, C.data(), LIST_SIZE);
57+
58+
bool all_right = true;
59+
60+
for (int i = 0; i < LIST_SIZE; ++i)
61+
if (C[i] != 43) {
62+
std::cerr << "At index: " << i << ". ";
63+
std::cerr << C[i] << " != " << 43 << "\n";
64+
all_right = false;
65+
break;
66+
}
67+
if (all_right) {
68+
std::cout << "Pass" << std::endl;
69+
return 0;
70+
}
71+
std::cout << "Error" << std::endl;
72+
return -1;
73+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// UNSUPPORTED: cuda
2+
// REQUIRES: gpu,linux
3+
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
4+
// RUN: %t.out
5+
6+
#include "include/asmcheck.h"
7+
#include <CL/sycl.hpp>
8+
#include <iostream>
9+
#include <vector>
10+
11+
constexpr int LIST_SIZE = 8;
12+
using arr_t = std::vector<cl::sycl::cl_int>;
13+
constexpr auto sycl_write = cl::sycl::access::mode::write;
14+
15+
// class is used for kernel name
16+
template <typename T>
17+
class simple_vector_add;
18+
19+
template <typename T>
20+
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21+
cl::sycl::range<1> numOfItems{sz};
22+
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23+
24+
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25+
auto C = bufferC.template get_access<sycl_write>(cgh);
26+
27+
auto kern = [C](cl::sycl::id<1> wiID)
28+
[[cl::intel_reqd_sub_group_size(16)]] {
29+
volatile int output = 0;
30+
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31+
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
32+
: "=rw"(output));
33+
#else
34+
output = 7;
35+
#endif
36+
C[wiID] = output;
37+
};
38+
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
39+
});
40+
};
41+
42+
int main() {
43+
arr_t C(LIST_SIZE);
44+
45+
cl::sycl::gpu_selector gpsel;
46+
cl::sycl::queue deviceQueue(gpsel);
47+
sycl::device Device = deviceQueue.get_device();
48+
49+
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
50+
std::cout << "Skipping test\n";
51+
return 0;
52+
}
53+
for (int i = 0; i < LIST_SIZE; i++) {
54+
C[i] = 0;
55+
}
56+
57+
process_buffers(deviceQueue, C.data(), LIST_SIZE);
58+
59+
bool all_right = true;
60+
for (int i = 0; i < LIST_SIZE; ++i)
61+
if (C[i] != 7) {
62+
std::cerr << "At index: " << i << ". ";
63+
std::cerr << C[i] << " != " << 7 << "\n";
64+
all_right = false;
65+
break;
66+
}
67+
if (all_right) {
68+
std::cout << "Pass" << std::endl;
69+
return 0;
70+
}
71+
std::cout << "Error" << std::endl;
72+
return -1;
73+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// UNSUPPORTED: cuda
2+
// REQUIRES: gpu,linux
3+
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
4+
// RUN: %t.out
5+
6+
#include "include/asmcheck.h"
7+
#include <CL/sycl.hpp>
8+
#include <iostream>
9+
#include <vector>
10+
11+
constexpr int LIST_SIZE = 8;
12+
using arr_t = std::vector<cl::sycl::cl_int>;
13+
constexpr auto sycl_write = cl::sycl::access::mode::write;
14+
15+
// class is used for kernel name
16+
template <typename T>
17+
class simple_vector_add;
18+
19+
template <typename T>
20+
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21+
cl::sycl::range<1> numOfItems{sz};
22+
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23+
24+
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25+
auto C = bufferC.template get_access<sycl_write>(cgh);
26+
27+
auto kern = [C](cl::sycl::id<1> wiID)
28+
[[cl::intel_reqd_sub_group_size(16)]] {
29+
volatile int output = 0;
30+
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31+
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
32+
: "=rw"(output));
33+
#else
34+
output = 7;
35+
#endif
36+
C[wiID] = output;
37+
};
38+
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
39+
});
40+
};
41+
42+
int main() {
43+
arr_t C(LIST_SIZE);
44+
45+
cl::sycl::gpu_selector gpsel;
46+
cl::sycl::queue deviceQueue(gpsel);
47+
sycl::device Device = deviceQueue.get_device();
48+
49+
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
50+
std::cout << "Skipping test\n";
51+
return 0;
52+
}
53+
for (int i = 0; i < LIST_SIZE; i++) {
54+
C[i] = 0;
55+
}
56+
57+
process_buffers(deviceQueue, C.data(), LIST_SIZE);
58+
59+
bool all_right = true;
60+
for (int i = 0; i < LIST_SIZE; ++i)
61+
if (C[i] != 7) {
62+
std::cerr << "At index: " << i << ". ";
63+
std::cerr << C[i] << " != " << 7 << "\n";
64+
all_right = false;
65+
break;
66+
}
67+
if (all_right) {
68+
std::cout << "Pass" << std::endl;
69+
return 0;
70+
}
71+
std::cout << "Error" << std::endl;
72+
return -1;
73+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// UNSUPPORTED: cuda
2+
// REQUIRES: gpu,linux
3+
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
4+
// RUN: %t.out
5+
6+
#include "include/asmcheck.h"
7+
#include <CL/sycl.hpp>
8+
#include <iostream>
9+
#include <vector>
10+
11+
constexpr int LIST_SIZE = 1024;
12+
using arr_t = std::vector<cl::sycl::cl_int>;
13+
constexpr auto sycl_write = cl::sycl::access::mode::write;
14+
15+
// class is used for kernel name
16+
template <typename T>
17+
class simple_vector_add;
18+
19+
template <typename T>
20+
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21+
cl::sycl::range<1> numOfItems{sz};
22+
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23+
24+
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25+
auto C = bufferC.template get_access<sycl_write>(cgh);
26+
27+
auto kern = [C](cl::sycl::id<1> wiID)
28+
[[cl::intel_reqd_sub_group_size(16)]] {
29+
for (int i = 0; i < 10; ++i) {
30+
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31+
asm("fence_sw");
32+
C[wiID] += i;
33+
34+
#else
35+
C[wiID] += i;
36+
#endif
37+
}
38+
};
39+
cgh.parallel_for<class simple_vector_add<T>>(numOfItems, kern);
40+
});
41+
};
42+
43+
int main() {
44+
arr_t C(LIST_SIZE);
45+
46+
cl::sycl::gpu_selector gpsel;
47+
cl::sycl::queue deviceQueue(gpsel);
48+
sycl::device Device = deviceQueue.get_device();
49+
50+
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
51+
std::cout << "Skipping test\n";
52+
return 0;
53+
}
54+
for (int i = 0; i < LIST_SIZE; i++) {
55+
C[i] = 0;
56+
}
57+
58+
process_buffers(deviceQueue, C.data(), LIST_SIZE);
59+
60+
bool all_right = true;
61+
62+
for (int i = 0; i < LIST_SIZE; ++i)
63+
if (C[i] != 45) {
64+
std::cerr << "At index: " << i << ". ";
65+
std::cerr << C[i] << " != " << 45 << "\n";
66+
all_right = false;
67+
break;
68+
}
69+
if (all_right) {
70+
std::cout << "Pass" << std::endl;
71+
return 0;
72+
}
73+
std::cout << "Error" << std::endl;
74+
return -1;
75+
}

sycl/test/inline-asm/asm_8_empty.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// UNSUPPORTED: cuda
2+
// REQUIRES: gpu,linux
3+
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
4+
// RUN: %t.out
5+
6+
#include "include/asmcheck.h"
7+
#include <CL/sycl.hpp>
8+
#include <iostream>
9+
#include <vector>
10+
11+
constexpr int LIST_SIZE = 1024;
12+
using arr_t = std::vector<cl::sycl::cl_int>;
13+
constexpr auto sycl_write = cl::sycl::access::mode::write;
14+
15+
// class is used for kernel name
16+
template <typename T>
17+
class no_opts;
18+
19+
template <typename T>
20+
void process_buffers(cl::sycl::queue &deviceQueue, T *pc, size_t sz) {
21+
cl::sycl::range<1> numOfItems{sz};
22+
cl::sycl::buffer<T, 1> bufferC(pc, numOfItems);
23+
24+
deviceQueue.submit([&](cl::sycl::handler &cgh) {
25+
auto C = bufferC.template get_access<sycl_write>(cgh);
26+
27+
auto kern = [C](cl::sycl::id<1> wiID)
28+
[[cl::intel_reqd_sub_group_size(8)]] {
29+
C[wiID] = 43;
30+
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
31+
asm volatile("");
32+
#endif
33+
};
34+
cgh.parallel_for<class no_opts<T>>(numOfItems, kern);
35+
});
36+
};
37+
38+
int main() {
39+
arr_t C(LIST_SIZE);
40+
41+
cl::sycl::gpu_selector gpsel;
42+
cl::sycl::queue deviceQueue(gpsel);
43+
44+
sycl::device Device = deviceQueue.get_device();
45+
46+
if (!isInlineASMSupported(Device) || !Device.has_extension("cl_intel_required_subgroup_size")) {
47+
std::cout << "Skipping test\n";
48+
return 0;
49+
}
50+
51+
for (int i = 0; i < LIST_SIZE; i++) {
52+
C[i] = 0;
53+
}
54+
55+
process_buffers(deviceQueue, C.data(), LIST_SIZE);
56+
57+
bool all_right = true;
58+
59+
for (int i = 0; i < LIST_SIZE; ++i)
60+
if (C[i] != 43) {
61+
std::cerr << "At index: " << i << ". ";
62+
std::cerr << C[i] << " != " << 43 << "\n";
63+
all_right = false;
64+
break;
65+
}
66+
if (all_right) {
67+
std::cout << "Pass" << std::endl;
68+
return 0;
69+
}
70+
std::cout << "Error" << std::endl;
71+
return -1;
72+
}

0 commit comments

Comments
 (0)