|
| 1 | +//===-- test_sycl_queue_submit.cpp - Test cases for kernel submission fns. ===// |
| 2 | +// |
| 3 | +// Data Parallel Control (dpctl) |
| 4 | +// |
| 5 | +// Copyright 2020-2024 Intel Corporation |
| 6 | +// |
| 7 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 8 | +// you may not use this file except in compliance with the License. |
| 9 | +// You may obtain a copy of the License at |
| 10 | +// |
| 11 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +// |
| 13 | +// Unless required by applicable law or agreed to in writing, software |
| 14 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 15 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | +// See the License for the specific language governing permissions and |
| 17 | +// limitations under the License. |
| 18 | +// |
| 19 | +//===----------------------------------------------------------------------===// |
| 20 | +/// |
| 21 | +/// \file |
| 22 | +/// This file has unit test cases for the various submit functions defined |
| 23 | +/// inside dpctl_sycl_queue_interface.cpp. |
| 24 | +//===----------------------------------------------------------------------===// |
| 25 | + |
| 26 | +#include "dpctl_sycl_context_interface.h" |
| 27 | +#include "dpctl_sycl_device_interface.h" |
| 28 | +#include "dpctl_sycl_device_selector_interface.h" |
| 29 | +#include "dpctl_sycl_event_interface.h" |
| 30 | +#include "dpctl_sycl_kernel_bundle_interface.h" |
| 31 | +#include "dpctl_sycl_kernel_interface.h" |
| 32 | +#include "dpctl_sycl_queue_interface.h" |
| 33 | +#include "dpctl_sycl_type_casters.hpp" |
| 34 | +#include "dpctl_sycl_usm_interface.h" |
| 35 | +#include <filesystem> |
| 36 | +#include <fstream> |
| 37 | +#include <gtest/gtest.h> |
| 38 | +#include <sycl/sycl.hpp> |
| 39 | +#include <utility> |
| 40 | + |
| 41 | +namespace |
| 42 | +{ |
| 43 | +constexpr size_t SIZE = 100; |
| 44 | + |
| 45 | +using namespace dpctl::syclinterface; |
| 46 | + |
| 47 | +typedef struct MDLocalAccessorTy |
| 48 | +{ |
| 49 | + size_t ndim; |
| 50 | + DPCTLKernelArgType dpctl_type_id; |
| 51 | + size_t dim0; |
| 52 | + size_t dim1; |
| 53 | + size_t dim2; |
| 54 | +} MDLocalAccessor; |
| 55 | + |
| 56 | +template <typename T> |
| 57 | +void submit_kernel(DPCTLSyclQueueRef QRef, |
| 58 | + DPCTLSyclKernelBundleRef KBRef, |
| 59 | + std::vector<char> spirvBuffer, |
| 60 | + size_t spirvFileSize, |
| 61 | + DPCTLKernelArgType kernelArgTy, |
| 62 | + std::string kernelName) |
| 63 | +{ |
| 64 | + constexpr size_t NARGS = 2; |
| 65 | + constexpr size_t RANGE_NDIMS = 1; |
| 66 | + |
| 67 | + ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); |
| 68 | + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); |
| 69 | + |
| 70 | + // Create the input args |
| 71 | + auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); |
| 72 | + ASSERT_TRUE(a != nullptr); |
| 73 | + auto a_ptr = static_cast<T *>(unwrap<void>(a)); |
| 74 | + for (auto i = 0ul; i < SIZE; ++i) { |
| 75 | + a_ptr[i] = 0; |
| 76 | + } |
| 77 | + |
| 78 | + auto la = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1}; |
| 79 | + |
| 80 | + // Create kernel args for vector_add |
| 81 | + size_t gRange[] = {SIZE}; |
| 82 | + size_t lRange[] = {SIZE / 10}; |
| 83 | + void *args[NARGS] = {unwrap<void>(a), (void *)&la}; |
| 84 | + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, |
| 85 | + DPCTL_LOCAL_ACCESSOR}; |
| 86 | + |
| 87 | + auto ERef = |
| 88 | + DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, |
| 89 | + gRange, lRange, RANGE_NDIMS, nullptr, 0); |
| 90 | + ASSERT_TRUE(ERef != nullptr); |
| 91 | + DPCTLQueue_Wait(QRef); |
| 92 | + |
| 93 | + if (kernelArgTy != DPCTL_FLOAT32_T && kernelArgTy != DPCTL_FLOAT64_T) |
| 94 | + ASSERT_TRUE(a_ptr[0] == 20); |
| 95 | + else |
| 96 | + ASSERT_TRUE(a_ptr[0] == 20.0); |
| 97 | + |
| 98 | + // clean ups |
| 99 | + DPCTLEvent_Delete(ERef); |
| 100 | + DPCTLKernel_Delete(kernel); |
| 101 | + DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); |
| 102 | +} |
| 103 | + |
| 104 | +} /* end of anonymous namespace */ |
| 105 | + |
| 106 | +/* |
| 107 | +// The local_accessor_kernel spv files were generated from the SYCL program |
| 108 | +// included in this comment. The program can be compiled using |
| 109 | +// `icpx -fsycl local_accessor_kernel.cpp`. After that if the generated |
| 110 | +// executable is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx |
| 111 | +// runtime will dump all offload sections of fat binary to the current working |
| 112 | +// directory. When tested with DPC++ 2024.0 the kernels are split across two |
| 113 | +// separate SPV files. One contains all kernels for integers and FP32 |
| 114 | +// data type, and another contains the kernel for FP64. |
| 115 | +// |
| 116 | +// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that |
| 117 | +// contain the code for built in functions such as indexing and barriers. To |
| 118 | +// figure which SPV file contains the kernels, use `spirv-dis` from the |
| 119 | +// spirv-tools package to translate the SPV binary format to a human-readable |
| 120 | +// textual format. |
| 121 | +#include <CL/sycl.hpp> |
| 122 | +#include <iostream> |
| 123 | +#include <sstream> |
| 124 | +
|
| 125 | +template <typename T> |
| 126 | +class SyclKernel_SLM |
| 127 | +{ |
| 128 | +private: |
| 129 | + T N_; |
| 130 | + T *a_ = nullptr; |
| 131 | + sycl::local_accessor<T, 1> slm_; |
| 132 | +
|
| 133 | +public: |
| 134 | + SyclKernel_SLM(T *a, sycl::local_accessor<T, 1> slm) |
| 135 | + : a_(a), slm_(slm) |
| 136 | + { |
| 137 | + } |
| 138 | +
|
| 139 | + void operator()(sycl::nd_item<1> it) const |
| 140 | + { |
| 141 | + int i = it.get_global_id(); |
| 142 | + int j = it.get_local_id(); |
| 143 | + slm_[j] = 2; |
| 144 | + auto g = it.get_group(); |
| 145 | + group_barrier(g); |
| 146 | + auto temp = 0; |
| 147 | + for (auto idx = 0ul; idx < it.get_local_range(0); ++idx) |
| 148 | + temp += slm_[idx]; |
| 149 | + a_[i] = temp * (i + 1); |
| 150 | + } |
| 151 | +}; |
| 152 | +
|
| 153 | +template <typename T> |
| 154 | +void submit_kernel(sycl::queue q, const unsigned long N, T *a) |
| 155 | +{ |
| 156 | + q.submit([&](auto &h) |
| 157 | + { |
| 158 | + sycl::local_accessor<T, 1> slm(sycl::range(N/10), h); |
| 159 | + h.parallel_for(sycl::nd_range(sycl::range{N}, sycl::range{N/10}), |
| 160 | + SyclKernel_SLM<T>(a, slm)); }); |
| 161 | +} |
| 162 | +
|
| 163 | +template <typename T> |
| 164 | +void driver(size_t N) |
| 165 | +{ |
| 166 | + sycl::queue q; |
| 167 | + auto *a = sycl::malloc_shared<T>(N, q); |
| 168 | + submit_kernel(q, N, a); |
| 169 | + q.wait(); |
| 170 | + sycl::free(a, q); |
| 171 | +} |
| 172 | +
|
| 173 | +int main(int argc, const char **argv) |
| 174 | +{ |
| 175 | + size_t N = 0; |
| 176 | + std::cout << "Enter problem size in N:\n"; |
| 177 | + std::cin >> N; |
| 178 | + std::cout << "Executing with N = " << N << std::endl; |
| 179 | +
|
| 180 | + driver<int8_t>(N); |
| 181 | + driver<uint8_t>(N); |
| 182 | + driver<int16_t>(N); |
| 183 | + driver<int32_t>(N); |
| 184 | + driver<int32_t>(N); |
| 185 | + driver<uint32_t>(N); |
| 186 | + driver<int64_t>(N); |
| 187 | + driver<uint64_t>(N); |
| 188 | + driver<float>(N); |
| 189 | + driver<double>(N); |
| 190 | +
|
| 191 | + return 0; |
| 192 | +} |
| 193 | +
|
| 194 | +*/ |
| 195 | + |
| 196 | +struct TestQueueSubmitWithLocalAccessor : public ::testing::Test |
| 197 | +{ |
| 198 | + std::ifstream spirvFile; |
| 199 | + size_t spirvFileSize_; |
| 200 | + std::vector<char> spirvBuffer_; |
| 201 | + DPCTLSyclQueueRef QRef = nullptr; |
| 202 | + DPCTLSyclKernelBundleRef KBRef = nullptr; |
| 203 | + |
| 204 | + TestQueueSubmitWithLocalAccessor() |
| 205 | + { |
| 206 | + DPCTLSyclDeviceSelectorRef DSRef = nullptr; |
| 207 | + DPCTLSyclDeviceRef DRef = nullptr; |
| 208 | + |
| 209 | + spirvFile.open("./local_accessor_kernel_inttys_fp32.spv", |
| 210 | + std::ios::binary | std::ios::ate); |
| 211 | + spirvFileSize_ = std::filesystem::file_size( |
| 212 | + "./local_accessor_kernel_inttys_fp32.spv"); |
| 213 | + spirvBuffer_.reserve(spirvFileSize_); |
| 214 | + spirvFile.seekg(0, std::ios::beg); |
| 215 | + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); |
| 216 | + |
| 217 | + DSRef = DPCTLDefaultSelector_Create(); |
| 218 | + DRef = DPCTLDevice_CreateFromSelector(DSRef); |
| 219 | + QRef = |
| 220 | + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); |
| 221 | + auto CRef = DPCTLQueue_GetContext(QRef); |
| 222 | + |
| 223 | + KBRef = DPCTLKernelBundle_CreateFromSpirv( |
| 224 | + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); |
| 225 | + DPCTLDevice_Delete(DRef); |
| 226 | + DPCTLDeviceSelector_Delete(DSRef); |
| 227 | + } |
| 228 | + |
| 229 | + ~TestQueueSubmitWithLocalAccessor() |
| 230 | + { |
| 231 | + spirvFile.close(); |
| 232 | + DPCTLQueue_Delete(QRef); |
| 233 | + DPCTLKernelBundle_Delete(KBRef); |
| 234 | + } |
| 235 | +}; |
| 236 | + |
| 237 | +struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test |
| 238 | +{ |
| 239 | + std::ifstream spirvFile; |
| 240 | + size_t spirvFileSize_; |
| 241 | + std::vector<char> spirvBuffer_; |
| 242 | + DPCTLSyclQueueRef QRef = nullptr; |
| 243 | + DPCTLSyclKernelBundleRef KBRef = nullptr; |
| 244 | + |
| 245 | + TestQueueSubmitWithLocalAccessorFP64() |
| 246 | + { |
| 247 | + DPCTLSyclDeviceSelectorRef DSRef = nullptr; |
| 248 | + DPCTLSyclDeviceRef DRef = nullptr; |
| 249 | + |
| 250 | + spirvFile.open("./local_accessor_kernel_fp64.spv", |
| 251 | + std::ios::binary | std::ios::ate); |
| 252 | + spirvFileSize_ = |
| 253 | + std::filesystem::file_size("./local_accessor_kernel_fp64.spv"); |
| 254 | + spirvBuffer_.reserve(spirvFileSize_); |
| 255 | + spirvFile.seekg(0, std::ios::beg); |
| 256 | + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); |
| 257 | + DSRef = DPCTLDefaultSelector_Create(); |
| 258 | + DRef = DPCTLDevice_CreateFromSelector(DSRef); |
| 259 | + QRef = |
| 260 | + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); |
| 261 | + auto CRef = DPCTLQueue_GetContext(QRef); |
| 262 | + |
| 263 | + KBRef = DPCTLKernelBundle_CreateFromSpirv( |
| 264 | + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); |
| 265 | + DPCTLDevice_Delete(DRef); |
| 266 | + DPCTLDeviceSelector_Delete(DSRef); |
| 267 | + } |
| 268 | + |
| 269 | + ~TestQueueSubmitWithLocalAccessorFP64() |
| 270 | + { |
| 271 | + spirvFile.close(); |
| 272 | + DPCTLQueue_Delete(QRef); |
| 273 | + DPCTLKernelBundle_Delete(KBRef); |
| 274 | + } |
| 275 | +}; |
| 276 | + |
| 277 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt8) |
| 278 | +{ |
| 279 | + submit_kernel<int8_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 280 | + DPCTLKernelArgType::DPCTL_INT8_T, |
| 281 | + "_ZTS14SyclKernel_SLMIaE"); |
| 282 | +} |
| 283 | + |
| 284 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt8) |
| 285 | +{ |
| 286 | + submit_kernel<uint8_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 287 | + DPCTLKernelArgType::DPCTL_UINT8_T, |
| 288 | + "_ZTS14SyclKernel_SLMIhE"); |
| 289 | +} |
| 290 | + |
| 291 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt16) |
| 292 | +{ |
| 293 | + submit_kernel<int16_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 294 | + DPCTLKernelArgType::DPCTL_INT16_T, |
| 295 | + "_ZTS14SyclKernel_SLMIsE"); |
| 296 | +} |
| 297 | + |
| 298 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt16) |
| 299 | +{ |
| 300 | + submit_kernel<uint16_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 301 | + DPCTLKernelArgType::DPCTL_UINT16_T, |
| 302 | + "_ZTS14SyclKernel_SLMItE"); |
| 303 | +} |
| 304 | + |
| 305 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt32) |
| 306 | +{ |
| 307 | + submit_kernel<int32_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 308 | + DPCTLKernelArgType::DPCTL_INT32_T, |
| 309 | + "_ZTS14SyclKernel_SLMIiE"); |
| 310 | +} |
| 311 | + |
| 312 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt32) |
| 313 | +{ |
| 314 | + submit_kernel<uint32_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 315 | + DPCTLKernelArgType::DPCTL_UINT32_T, |
| 316 | + "_ZTS14SyclKernel_SLMIjE"); |
| 317 | +} |
| 318 | + |
| 319 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt64) |
| 320 | +{ |
| 321 | + submit_kernel<int64_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 322 | + DPCTLKernelArgType::DPCTL_INT64_T, |
| 323 | + "_ZTS14SyclKernel_SLMIlE"); |
| 324 | +} |
| 325 | + |
| 326 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt64) |
| 327 | +{ |
| 328 | + submit_kernel<uint64_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 329 | + DPCTLKernelArgType::DPCTL_UINT64_T, |
| 330 | + "_ZTS14SyclKernel_SLMImE"); |
| 331 | +} |
| 332 | + |
| 333 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForFloat) |
| 334 | +{ |
| 335 | + submit_kernel<float>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 336 | + DPCTLKernelArgType::DPCTL_FLOAT32_T, |
| 337 | + "_ZTS14SyclKernel_SLMIfE"); |
| 338 | +} |
| 339 | + |
| 340 | +TEST_F(TestQueueSubmitWithLocalAccessorFP64, CheckForDouble) |
| 341 | +{ |
| 342 | + submit_kernel<double>(QRef, KBRef, spirvBuffer_, spirvFileSize_, |
| 343 | + DPCTLKernelArgType::DPCTL_FLOAT64_T, |
| 344 | + "_ZTS14SyclKernel_SLMIdE"); |
| 345 | +} |
| 346 | + |
| 347 | +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUnsupportedArgTy) |
| 348 | +{ |
| 349 | + size_t gRange[] = {SIZE}; |
| 350 | + size_t lRange[] = {SIZE / 10}; |
| 351 | + size_t RANGE_NDIMS = 1; |
| 352 | + constexpr size_t NARGS = 2; |
| 353 | + |
| 354 | + auto la = MDLocalAccessor{1, DPCTL_UNSUPPORTED_KERNEL_ARG, SIZE / 10, 1, 1}; |
| 355 | + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, "_ZTS14SyclKernel_SLMImE"); |
| 356 | + void *args[NARGS] = {unwrap<void>(nullptr), (void *)&la}; |
| 357 | + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, |
| 358 | + DPCTL_LOCAL_ACCESSOR}; |
| 359 | + auto ERef = |
| 360 | + DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, |
| 361 | + gRange, lRange, RANGE_NDIMS, nullptr, 0); |
| 362 | + |
| 363 | + ASSERT_TRUE(ERef == nullptr); |
| 364 | +} |
0 commit comments