|
| 1 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out |
| 2 | +// RUN: %CPU_RUN_PLACEHOLDER %t.out |
| 3 | +// RUN: %GPU_RUN_PLACEHOLDER %t.out |
| 4 | + |
| 5 | +// UNSUPPORTED: cuda || hip |
| 6 | + |
| 7 | +// This test checks the spenario of using specialization constants with an |
| 8 | +// 'array of array' as well as a 'stuct with an array of array' types for |
| 9 | +// vector convolution as it is described in chapter 4.9.5. Specialization |
| 10 | +// constants of the SYCL 2020 specification: |
| 11 | +// https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_example_usage |
| 12 | + |
| 13 | +#include <sycl/sycl.hpp> |
| 14 | + |
| 15 | +#include <array> |
| 16 | +#include <cmath> |
| 17 | +#include <iostream> |
| 18 | + |
| 19 | +using namespace sycl; |
| 20 | + |
| 21 | +using coeff_t = std::array<std::array<float, 3>, 3>; |
| 22 | + |
| 23 | +struct coeff_struct_t { |
| 24 | + std::array<std::array<float, 3>, 3> c; |
| 25 | +}; |
| 26 | + |
| 27 | +coeff_t get_coefficients() { |
| 28 | + return {{{1.0, 2.0, 3.0}, {1.1, 2.1, 3.1}, {1.2, 2.2, 3.2}}}; |
| 29 | +} |
| 30 | + |
| 31 | +coeff_struct_t get_coefficient_struct() { |
| 32 | + return {{{{1.0, 2.0, 3.0}, {1.1, 2.1, 3.1}, {1.2, 2.2, 3.2}}}}; |
| 33 | +} |
| 34 | + |
| 35 | +constexpr specialization_id<coeff_t> coeff_id; |
| 36 | + |
| 37 | +constexpr specialization_id<coeff_struct_t> coeff_struct_id; |
| 38 | + |
| 39 | +template <typename IN> |
| 40 | +float calc_conv(const coeff_t &coeff, const IN &in, item<2> item_id) { |
| 41 | + float acc = 0; |
| 42 | + |
| 43 | + for (int i = -1; i <= 1; i++) { |
| 44 | + if (item_id[0] + i < 0 || item_id[0] + i >= in.get_range()[0]) |
| 45 | + continue; |
| 46 | + for (int j = -1; j <= 1; j++) { |
| 47 | + if (item_id[1] + j < 0 || item_id[1] + j >= in.get_range()[1]) |
| 48 | + continue; |
| 49 | + // The underlying JIT can see all the values of the array returned |
| 50 | + // by coeff.get(). |
| 51 | + acc += coeff[i + 1][j + 1] * in[item_id[0] + i][item_id[1] + j]; |
| 52 | + } |
| 53 | + } |
| 54 | + return acc; |
| 55 | +} |
| 56 | + |
| 57 | +template <typename KernelName, typename CP> |
| 58 | +void do_conv(buffer<float, 2> in, buffer<float, 2> out, CP coeff_provider) { |
| 59 | + queue myQueue; |
| 60 | + |
| 61 | + myQueue.submit([&](handler &cgh) { |
| 62 | + auto in_acc = in.template get_access<access::mode::read>(cgh); |
| 63 | + auto out_acc = out.template get_access<access::mode::write>(cgh); |
| 64 | + |
| 65 | + // Set the coefficient of the convolution as constant. |
| 66 | + // This will build a specific kernel the coefficient available as literals. |
| 67 | + cgh.set_specialization_constant<coeff_id>(get_coefficients()); |
| 68 | + cgh.set_specialization_constant<coeff_struct_id>(get_coefficient_struct()); |
| 69 | + cgh.parallel_for<KernelName>( |
| 70 | + in.get_range(), [=](item<2> item_id, kernel_handler h) { |
| 71 | + auto coeff = coeff_provider(h); |
| 72 | + out_acc[item_id] = calc_conv(coeff, in_acc, item_id); |
| 73 | + }); |
| 74 | + }); |
| 75 | + |
| 76 | + myQueue.wait(); |
| 77 | +} |
| 78 | + |
| 79 | +constexpr size_t N = 5; |
| 80 | +constexpr size_t M = 4; |
| 81 | + |
| 82 | +constexpr std::array<std::array<float, M>, N> expected = { |
| 83 | + {{17.1, 30.1, 43.0, 24.3}, |
| 84 | + {41.3, 63.9, 82.8, 45.5}, |
| 85 | + {72.5, 101.7, 120.6, 64.7}, |
| 86 | + {103.7, 139.5, 158.4, 83.9}, |
| 87 | + {77.7, 102.7, 115.0, 60.1}}}; |
| 88 | + |
| 89 | +template <typename Result, typename Expected> |
| 90 | +void compare_result(const Result &result, const Expected &expected) { |
| 91 | + for (size_t i = 0; i < N; i++) { |
| 92 | + for (size_t j = 0; j < M; j++) { |
| 93 | + if (std::abs(result[i][j] - expected[i][j]) > 0.1) { |
| 94 | + std::cout << "Wrong value " << result[i][j] << " on element " << i |
| 95 | + << ", " << j << std::endl; |
| 96 | + exit(-1); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +int main() { |
| 103 | + |
| 104 | + buffer<float, 2> input{range<2>{N, M}}; |
| 105 | + buffer<float, 2> output{range<2>{N, M}}; |
| 106 | + |
| 107 | + // Launch an asynchronous kernel to initialize input |
| 108 | + queue myQueue; |
| 109 | + myQueue.submit([&](handler &cgh) { |
| 110 | + accessor input_acc{input, cgh, write_only}; |
| 111 | + |
| 112 | + cgh.parallel_for(input.get_range(), [=](id<2> index) { |
| 113 | + input_acc[index] = index[0] * 2 + index[1]; |
| 114 | + }); |
| 115 | + }); |
| 116 | + |
| 117 | + do_conv<class Convolution1>(input, output, [](kernel_handler &h) { |
| 118 | + return h.get_specialization_constant<coeff_id>(); |
| 119 | + }); |
| 120 | + |
| 121 | + compare_result(host_accessor{output, read_only}, expected); |
| 122 | + |
| 123 | + do_conv<class Convolution2>(input, output, [](kernel_handler &h) { |
| 124 | + return h.get_specialization_constant<coeff_struct_id>().c; |
| 125 | + }); |
| 126 | + |
| 127 | + compare_result(host_accessor{output, read_only}, expected); |
| 128 | + |
| 129 | + std::cout << "Good computation!" << std::endl; |
| 130 | + return 0; |
| 131 | +} |
0 commit comments