|
| 1 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out |
| 2 | +// RUN: %CPU_RUN_PLACEHOLDER %t.out |
| 3 | +// RUN: %GPU_RUN_PLACEHOLDER %t.out |
| 4 | +// RUN: %ACC_RUN_PLACEHOLDER %t.out |
| 5 | + |
| 6 | +#include <sycl/sycl.hpp> |
| 7 | +using namespace sycl; |
| 8 | + |
| 9 | +template <int Dims> auto get_global_range(range<Dims> Range) { return Range; } |
| 10 | +template <int Dims> auto get_global_range(nd_range<Dims> NDRange) { |
| 11 | + return NDRange.get_global_range(); |
| 12 | +} |
| 13 | + |
| 14 | +template <int Dims, bool WithOffset> |
| 15 | +auto get_global_id(item<Dims, WithOffset> Item) { |
| 16 | + return Item.get_id(); |
| 17 | +} |
| 18 | +template <int Dims> auto get_global_id(nd_item<Dims> NDItem) { |
| 19 | + return NDItem.get_global_id(); |
| 20 | +} |
| 21 | + |
| 22 | +template <int Dims> auto get_global_id(id<Dims> Id) { return Id; } |
| 23 | + |
| 24 | +// We can select strategy explicitly so no need to test all combinations of |
| 25 | +// types/operations. |
| 26 | +using T = int; |
| 27 | +using BinOpTy = std::plus<T>; |
| 28 | + |
| 29 | +// On Windows, allocating new memory and then initializing it is slow for some |
| 30 | +// reason (not related to reductions). Try to re-use the same memory between |
| 31 | +// test cases. |
| 32 | +struct RedStorage { |
| 33 | + RedStorage(queue &q) : q(q), Ptr(malloc_device<T>(1, q)), Buf(1) {} |
| 34 | + ~RedStorage() { free(Ptr, q); } |
| 35 | + |
| 36 | + template <bool UseUSM> auto get() { |
| 37 | + if constexpr (UseUSM) |
| 38 | + return Ptr; |
| 39 | + else |
| 40 | + return Buf; |
| 41 | + } |
| 42 | + queue &q; |
| 43 | + T *Ptr; |
| 44 | + buffer<T, 1> Buf; |
| 45 | +}; |
| 46 | + |
| 47 | +template <bool UseUSM, bool InitToIdentity, |
| 48 | + detail::reduction::strategy Strategy, typename RangeTy> |
| 49 | +static void test(RedStorage &Storage, RangeTy Range) { |
| 50 | + queue &q = Storage.q; |
| 51 | + |
| 52 | + T Init{19}; |
| 53 | + |
| 54 | + auto Red = Storage.get<UseUSM>(); |
| 55 | + auto GetRedAcc = [&](handler &cgh) { |
| 56 | + if constexpr (UseUSM) |
| 57 | + return Red; |
| 58 | + else |
| 59 | + return accessor{Red, cgh}; |
| 60 | + }; |
| 61 | + |
| 62 | + q.submit([&](handler &cgh) { |
| 63 | + auto RedAcc = GetRedAcc(cgh); |
| 64 | + cgh.single_task([=]() { RedAcc[0] = Init; }); |
| 65 | + }).wait(); |
| 66 | + |
| 67 | + q.submit([&](handler &cgh) { |
| 68 | + auto RedSycl = [&]() { |
| 69 | + if constexpr (UseUSM) |
| 70 | + if constexpr (InitToIdentity) |
| 71 | + return reduction(Red, BinOpTy{}, |
| 72 | + property::reduction::initialize_to_identity{}); |
| 73 | + else |
| 74 | + return reduction(Red, BinOpTy{}); |
| 75 | + else if constexpr (InitToIdentity) |
| 76 | + return reduction(Red, cgh, BinOpTy{}, |
| 77 | + property::reduction::initialize_to_identity{}); |
| 78 | + else |
| 79 | + return reduction(Red, cgh, BinOpTy{}); |
| 80 | + }(); |
| 81 | + detail::reduction_parallel_for<detail::auto_name, Strategy>( |
| 82 | + cgh, Range, ext::oneapi::experimental::detail::empty_properties_t{}, |
| 83 | + RedSycl, [=](auto Item, auto &Red) { Red.combine(T{1}); }); |
| 84 | + }).wait(); |
| 85 | + |
| 86 | + auto *Result = malloc_shared<T>(1, q); |
| 87 | + q.submit([&](handler &cgh) { |
| 88 | + auto RedAcc = GetRedAcc(cgh); |
| 89 | + cgh.single_task([=]() { *Result = RedAcc[0]; }); |
| 90 | + }).wait(); |
| 91 | + |
| 92 | + auto N = get_global_range(Range).size(); |
| 93 | + int Expected = InitToIdentity ? N : Init + N; |
| 94 | +#if defined(__PRETTY_FUNCTION__) |
| 95 | + std::cout << __PRETTY_FUNCTION__; |
| 96 | +#elif defined(__FUNCSIG__) |
| 97 | + std::cout << __FUNCSIG__; |
| 98 | +#endif |
| 99 | + std::cout << ": " << *Result << ", expected " << Expected << std::endl; |
| 100 | + assert(*Result == Expected); |
| 101 | + |
| 102 | + free(Result, q); |
| 103 | +} |
| 104 | + |
| 105 | +template <int... Inds, class F> |
| 106 | +void loop_impl(std::integer_sequence<int, Inds...>, F &&f) { |
| 107 | + (f(std::integral_constant<int, Inds>{}), ...); |
| 108 | +} |
| 109 | + |
| 110 | +template <int count, class F> void loop(F &&f) { |
| 111 | + loop_impl(std::make_integer_sequence<int, count>{}, std::forward<F>(f)); |
| 112 | +} |
| 113 | + |
| 114 | +template <bool UseUSM, bool InitToIdentity, typename RangeTy> |
| 115 | +void testAllStrategies(RedStorage &Storage, RangeTy Range) { |
| 116 | + loop<(int)detail::reduction::strategy::multi>([&](auto Id) { |
| 117 | + constexpr auto Strategy = |
| 118 | + // Skip auto_select == 0. |
| 119 | + detail::reduction::strategy{decltype(Id)::value + 1}; |
| 120 | + test<UseUSM, InitToIdentity, Strategy>(Storage, Range); |
| 121 | + }); |
| 122 | +} |
| 123 | + |
| 124 | +int main() { |
| 125 | + queue q; |
| 126 | + RedStorage Storage(q); |
| 127 | + |
| 128 | + auto TestRange = [&](auto Range) { |
| 129 | + testAllStrategies<true, true>(Storage, Range); |
| 130 | + testAllStrategies<true, false>(Storage, Range); |
| 131 | + testAllStrategies<false, true>(Storage, Range); |
| 132 | + testAllStrategies<false, false>(Storage, Range); |
| 133 | + }; |
| 134 | + |
| 135 | + TestRange(range<1>{42}); |
| 136 | + TestRange(range<2>{8, 8}); |
| 137 | + TestRange(range<3>{7, 7, 5}); |
| 138 | + TestRange(nd_range<1>{range<1>{7}, range<1>{7}}); |
| 139 | + TestRange(nd_range<1>{range<1>{3 * 3}, range<1>{3}}); |
| 140 | + |
| 141 | + // TODO: Strategies historically adopted from sycl::range implementation only |
| 142 | + // support 1-Dim case. |
| 143 | + // |
| 144 | + // TestRange(nd_range<2>{range<2>{7, 3}, range<2> {7, 3}}); |
| 145 | + // TestRange(nd_range<2>{range<2>{14, 9}, range<2> {7, 3}}); |
| 146 | + return 0; |
| 147 | +} |
0 commit comments