|
| 1 | +// This is a basic acceptance test for inline ASM feature. More tests can be |
| 2 | +// found in https://github.com/intel/llvm-test-suite/tree/intel/SYCL/InlineAsm |
| 3 | +// RUN: %clangxx -fsycl %s -o %t.out |
| 4 | + |
| 5 | +#include <CL/sycl.hpp> |
| 6 | +#include <cmath> |
| 7 | +#include <iostream> |
| 8 | +#include <vector> |
| 9 | + |
| 10 | +constexpr const size_t DEFAULT_PROBLEM_SIZE = 16; |
| 11 | + |
| 12 | +using DataType = sycl::cl_int; |
| 13 | + |
| 14 | +int main() { |
| 15 | + DataType DataA[DEFAULT_PROBLEM_SIZE], DataB[DEFAULT_PROBLEM_SIZE], |
| 16 | + DataC[DEFAULT_PROBLEM_SIZE]; |
| 17 | + for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) { |
| 18 | + DataA[i] = i; |
| 19 | + DataB[i] = 2 * i; |
| 20 | + } |
| 21 | + |
| 22 | + // Create a simple asynchronous exception handler. |
| 23 | + auto AsyncHandler = [](sycl::exception_list ExceptionList) { |
| 24 | + for (auto &Exception : ExceptionList) { |
| 25 | + std::rethrow_exception(Exception); |
| 26 | + } |
| 27 | + }; |
| 28 | + |
| 29 | + { |
| 30 | + sycl::buffer<DataType, 1> BufA(DataA, DEFAULT_PROBLEM_SIZE); |
| 31 | + sycl::buffer<DataType, 1> BufB(DataB, DEFAULT_PROBLEM_SIZE); |
| 32 | + sycl::buffer<DataType, 1> BufC(DataC, DEFAULT_PROBLEM_SIZE); |
| 33 | + |
| 34 | + sycl::queue deviceQueue(sycl::gpu_selector{}, AsyncHandler); |
| 35 | + |
| 36 | + deviceQueue.submit([&](sycl::handler &cgh) { |
| 37 | + auto A = BufA.get_access<sycl::access::mode::read>(cgh); |
| 38 | + auto B = BufB.get_access<sycl::access::mode::read>(cgh); |
| 39 | + auto C = BufC.get_access<sycl::access::mode::write>(cgh); |
| 40 | + cgh.parallel_for<class FillBuffer>( |
| 41 | + sycl::range<1>{DEFAULT_PROBLEM_SIZE}, [= |
| 42 | + ](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { |
| 43 | +#if defined(__SYCL_DEVICE_ONLY__) |
| 44 | + asm volatile( |
| 45 | + ".decl P1 v_type=P num_elts=8\n" |
| 46 | + ".decl P2 v_type=P num_elts=8\n" |
| 47 | + ".decl temp v_type=G type=d num_elts=8 align=dword\n" |
| 48 | + "mov (M1, 8) %0(0, 0)<1> 0x0:d\n" |
| 49 | + "cmp.le (M1, 8) P1 %1(0,0)<1;1,0> 0x0:d\n" |
| 50 | + "(P1) goto (M1, 8) label0\n" |
| 51 | + "mov (M1, 8) temp(0,0)<1> 0x0:d\n" |
| 52 | + "label1:\n" |
| 53 | + "add (M1, 8) temp(0,0)<1> temp(0,0)<1;1,0> 0x1:w\n" |
| 54 | + "add (M1, 8) %0(0,0)<1> %0(0,0)<1;1,0> %2(0,0)<1;1,0>\n" |
| 55 | + "cmp.lt (M1, 8) P2 temp(0,0)<0;8,1> %1(0,0)<0;8,1>\n" |
| 56 | + "(P2) goto (M1, 8) label1\n" |
| 57 | + "label0:" |
| 58 | + : "+rw"(C[wiID]) |
| 59 | + : "rw"(A[wiID]), "rw"(B[wiID])); |
| 60 | +#else |
| 61 | + C[wiID] = 0; |
| 62 | + for (int i = 0; i < A[wiID]; ++i) { |
| 63 | + C[wiID] = C[wiID] + B[wiID]; |
| 64 | + } |
| 65 | +#endif |
| 66 | + }); |
| 67 | + }); |
| 68 | + } |
| 69 | + |
| 70 | + for (int i = 0; i < DEFAULT_PROBLEM_SIZE; i++) { |
| 71 | + if (DataC[i] != DataA[i] * DataB[i]) { |
| 72 | + std::cerr << "At index: " << i << ". "; |
| 73 | + std::cerr << DataC[i] << " != " << DataA[i] * DataB[i] << "\n"; |
| 74 | + return 1; |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + return 0; |
| 79 | +} |
0 commit comments