|
7 | 7 | using namespace cl::sycl;
|
8 | 8 |
|
9 | 9 | template <typename T, bool B> class KName;
|
| 10 | +template <typename T, typename> class TName; |
10 | 11 |
|
11 | 12 | template <typename Name, bool IsSYCL2020, access::mode Mode, int AccDim = 1,
|
12 | 13 | typename T, class BinaryOperation, int Dims>
|
13 | 14 | int test(queue &Q, T Identity, T Init, BinaryOperation BOp,
|
14 | 15 | const range<Dims> &Range) {
|
15 | 16 | printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
|
16 | 17 |
|
17 |
| - // Skip the test for such big arrays now. |
18 |
| - constexpr size_t TwoGB = 2LL * 1024 * 1024 * 1024; |
19 |
| - if (Range.size() > TwoGB) |
| 18 | + // It is a known problem with passing data that is close to 4Gb in size |
| 19 | + // to device. Such data breaks the execution pretty badly. |
| 20 | + // Some of test cases calling this function try to verify the correctness |
| 21 | + // of reduction with the global range bigger than the maximal work-group size |
| 22 | + // for the device. Maximal WG size for device may be very big, e.g. it is |
| 23 | + // 67108864 for ACC emulator. Multiplying that by some factor |
| 24 | + // (to exceed max WG-Size) and multiplying it by the element size may exceed |
| 25 | + // the safe size of data passed to device. |
| 26 | + // Let's set it to 1 GB for now, and just skip the test if it exceeds 1Gb. |
| 27 | + constexpr size_t OneGB = 1LL * 1024 * 1024 * 1024; |
| 28 | + if (Range.size() * sizeof(T) > OneGB) { |
| 29 | + std::cout << " SKIPPED due to too big data size" << std::endl; |
20 | 30 | return 0;
|
| 31 | + } |
21 | 32 |
|
22 | 33 | buffer<T, Dims> InBuf(Range);
|
23 | 34 | buffer<T, 1> OutBuf(1);
|
@@ -55,3 +66,115 @@ int testBoth(queue &Q, T Identity, T Init, BinaryOperation BOp,
|
55 | 66 | return test<KName<Name, false>, false, Mode>(Q, Identity, Init, BOp, Range) +
|
56 | 67 | test<KName<Name, true>, true, Mode>(Q, Identity, Init, BOp, Range);
|
57 | 68 | }
|
| 69 | + |
| 70 | +template <typename Name, bool IsSYCL2020, access::mode Mode, typename T, |
| 71 | + class BinaryOperation, int Dims> |
| 72 | +int testUSM(queue &Q, T Identity, T Init, BinaryOperation BOp, |
| 73 | + const range<Dims> &Range, usm::alloc AllocType) { |
| 74 | + printTestLabel<T, BinaryOperation>(IsSYCL2020, Range); |
| 75 | + |
| 76 | + auto Dev = Q.get_device(); |
| 77 | + if (!Dev.has(getUSMAspect(AllocType))) { |
| 78 | + std::cout << " SKIPPED due to unsupported USM alloc type" << std::endl; |
| 79 | + return 0; |
| 80 | + } |
| 81 | + |
| 82 | + // It is a known problem with passing data that is close to 4Gb in size |
| 83 | + // to device. Such data breaks the execution pretty badly. |
| 84 | + // Some of test cases calling this function try to verify the correctness |
| 85 | + // of reduction with the global range bigger than the maximal work-group size |
| 86 | + // for the device. Maximal WG size for device may be very big, e.g. it is |
| 87 | + // 67108864 for ACC emulator. Multiplying that by some factor |
| 88 | + // (to exceed max WG-Size) and multiplying it by the element size may exceed |
| 89 | + // the safe size of data passed to device. |
| 90 | + // Let's set it to 1 GB for now, and just skip the test if it exceeds 1Gb. |
| 91 | + constexpr size_t OneGB = 1LL * 1024 * 1024 * 1024; |
| 92 | + if (Range.size() * sizeof(T) > OneGB) { |
| 93 | + std::cout << " SKIPPED due to too big data size" << std::endl; |
| 94 | + return 0; |
| 95 | + } |
| 96 | + |
| 97 | + T *ReduVarPtr = (T *)malloc(sizeof(T), Dev, Q.get_context(), AllocType); |
| 98 | + if (ReduVarPtr == nullptr) { |
| 99 | + std::cout << " SKIPPED due to unrelated reason: alloc returned nullptr" |
| 100 | + << std::endl; |
| 101 | + return 0; |
| 102 | + } |
| 103 | + if (AllocType == usm::alloc::device) { |
| 104 | + Q.submit([&](handler &CGH) { |
| 105 | + CGH.single_task<TName<Name, class InitKernel>>( |
| 106 | + [=]() { *ReduVarPtr = Init; }); |
| 107 | + }).wait(); |
| 108 | + } else { |
| 109 | + *ReduVarPtr = Init; |
| 110 | + } |
| 111 | + |
| 112 | + // Initialize. |
| 113 | + T CorrectOut; |
| 114 | + buffer<T, Dims> InBuf(Range); |
| 115 | + initInputData(InBuf, CorrectOut, Identity, BOp, Range); |
| 116 | + if constexpr (Mode == access::mode::read_write) |
| 117 | + CorrectOut = BOp(CorrectOut, Init); |
| 118 | + |
| 119 | + // Compute. |
| 120 | + Q.submit([&](handler &CGH) { |
| 121 | + auto In = InBuf.template get_access<access::mode::read>(CGH); |
| 122 | + auto Redu = createReduction<IsSYCL2020, Mode>(ReduVarPtr, Identity, BOp); |
| 123 | + CGH.parallel_for<TName<Name, class Test>>( |
| 124 | + Range, Redu, [=](id<Dims> Id, auto &Sum) { Sum.combine(In[Id]); }); |
| 125 | + }).wait(); |
| 126 | + |
| 127 | + // Check correctness. |
| 128 | + T ComputedOut; |
| 129 | + if (AllocType == usm::alloc::device) { |
| 130 | + buffer<T, 1> Buf(&ComputedOut, range<1>(1)); |
| 131 | + Q.submit([&](handler &CGH) { |
| 132 | + auto OutAcc = Buf.template get_access<access::mode::discard_write>(CGH); |
| 133 | + CGH.single_task<TName<Name, class Check>>( |
| 134 | + [=]() { OutAcc[0] = *ReduVarPtr; }); |
| 135 | + }).wait(); |
| 136 | + ComputedOut = (Buf.template get_access<access::mode::read>())[0]; |
| 137 | + } else { |
| 138 | + ComputedOut = *ReduVarPtr; |
| 139 | + } |
| 140 | + |
| 141 | + std::string AllocStr = |
| 142 | + "AllocMode=" + std::to_string(static_cast<int>(AllocType)); |
| 143 | + int Error = checkResults(Q, IsSYCL2020, BOp, Range, ComputedOut, CorrectOut, |
| 144 | + AllocStr); |
| 145 | + free(ReduVarPtr, Q.get_context()); |
| 146 | + return Error; |
| 147 | +} |
| 148 | + |
| 149 | +template <typename Name, access::mode Mode, typename T, class BinaryOperation, |
| 150 | + int Dims> |
| 151 | +int test2020USM(queue &Q, T Identity, T Init, BinaryOperation BOp, |
| 152 | + const range<Dims> &Range) { |
| 153 | + int NumErrors = 0; |
| 154 | + NumErrors += testUSM<TName<Name, class Shared2020>, true, Mode, T>( |
| 155 | + Q, Identity, Init, BOp, Range, usm::alloc::shared); |
| 156 | + NumErrors += testUSM<TName<Name, class Host2020>, true, Mode, T>( |
| 157 | + Q, Identity, Init, BOp, Range, usm::alloc::host); |
| 158 | + NumErrors += testUSM<TName<Name, class Device2020>, true, Mode, T>( |
| 159 | + Q, Identity, Init, BOp, Range, usm::alloc::device); |
| 160 | + return NumErrors; |
| 161 | +} |
| 162 | + |
| 163 | +template <typename Name, access::mode Mode, typename T, class BinaryOperation, |
| 164 | + int Dims> |
| 165 | +int testONEAPIUSM(queue &Q, T Identity, T Init, BinaryOperation BOp, |
| 166 | + const range<Dims> &Range) { |
| 167 | + int NumErrors = 0; |
| 168 | + if (Mode == access::mode::discard_write) { |
| 169 | + std::cerr << "Skipped an incorrect test case: ext::oneapi::reduction " |
| 170 | + << "does not support discard_write mode for USM variables."; |
| 171 | + return 0; |
| 172 | + } |
| 173 | + NumErrors += testUSM<TName<Name, class Shared>, false, Mode, T>( |
| 174 | + Q, Identity, Init, BOp, Range, usm::alloc::shared); |
| 175 | + NumErrors += testUSM<TName<Name, class Host>, false, Mode, T>( |
| 176 | + Q, Identity, Init, BOp, Range, usm::alloc::host); |
| 177 | + NumErrors += testUSM<TName<Name, class Device>, false, Mode, T>( |
| 178 | + Q, Identity, Init, BOp, Range, usm::alloc::device); |
| 179 | + return NumErrors; |
| 180 | +} |
0 commit comments