|
| 1 | +// REQUIRES: gpu, level_zero |
| 2 | + |
| 3 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out |
| 4 | +// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=CHECK-ONE-CMDLIST %s |
| 5 | +// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2 ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=CHECK-PER-THREAD-CMDLIST %s |
| 6 | + |
| 7 | +// The test checks that immediate commandlists are created per-thread. |
| 8 | +// One immediate commandlist is created for device init, the rest for the queue. |
| 9 | + |
| 10 | +// CHECK-ONE-CMDLIST: zeCommandListCreateImmediate = 2 |
| 11 | +// CHECK-PER-THREAD-CMDLIST: zeCommandListCreateImmediate = 4 |
| 12 | + |
| 13 | +#include <sycl/sycl.hpp> |
| 14 | +#include <thread> |
| 15 | + |
| 16 | +using namespace sycl; |
| 17 | + |
| 18 | +bool results[3]; |
| 19 | + |
| 20 | +bool run_sample_kernel(queue Queue, int n) { |
| 21 | + // Creating buffer of 4 ints to be used inside the kernel code |
| 22 | + buffer<cl_int, 1> Buffer(4); |
| 23 | + |
| 24 | + // Size of index space for kernel |
| 25 | + range<1> NumOfWorkItems{Buffer.size()}; |
| 26 | + |
| 27 | + // Submitting command group(work) to queue |
| 28 | + Queue.submit([&](handler &cgh) { |
| 29 | + // Getting write only access to the buffer on a device |
| 30 | + accessor Accessor = {Buffer, cgh, write_only}; |
| 31 | + // Executing kernel |
| 32 | + cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](id<1> WIid) { |
| 33 | + // Fill buffer with indexes |
| 34 | + Accessor[WIid] = (cl_int)WIid.get(0); |
| 35 | + }); |
| 36 | + }); |
| 37 | + |
| 38 | + // Getting read only access to the buffer on the host. |
| 39 | + // Implicit barrier waiting for queue to complete the work. |
| 40 | + const host_accessor HostAccessor = {Buffer, read_only}; |
| 41 | + |
| 42 | + // Check the results |
| 43 | + bool MismatchFound = false; |
| 44 | + for (size_t I = 0; I < Buffer.size(); ++I) { |
| 45 | + if (HostAccessor[I] != I) { |
| 46 | + std::cout << "The result is incorrect for element: " << I |
| 47 | + << " , expected: " << I << " , got: " << HostAccessor[I] |
| 48 | + << std::endl; |
| 49 | + MismatchFound = true; |
| 50 | + } |
| 51 | + } |
| 52 | + |
| 53 | + if (!MismatchFound) { |
| 54 | + std::cout << "The results are correct!" << std::endl; |
| 55 | + } |
| 56 | + |
| 57 | + return MismatchFound; |
| 58 | +} |
| 59 | + |
| 60 | +void run_sample(queue Queue, int n) { |
| 61 | + results[n] = false; |
| 62 | + for (int i = 0; i < 5; i++) |
| 63 | + results[n] |= run_sample_kernel(Queue, n); |
| 64 | +} |
| 65 | + |
| 66 | +int main() { |
| 67 | + |
| 68 | + // Creating SYCL queue |
| 69 | + queue Queue; |
| 70 | + |
| 71 | + // Create one queue |
| 72 | + auto D = Queue.get_device(); |
| 73 | + const char *devType = D.is_cpu() ? "CPU" : "GPU"; |
| 74 | + std::string pluginName = D.get_platform().get_info<info::platform::name>(); |
| 75 | + std::cout << "Running on device " << devType << " (" |
| 76 | + << D.get_info<info::device::name>() << ") " << pluginName |
| 77 | + << " plugin\n"; |
| 78 | + |
| 79 | + // Use queue in multiple threads |
| 80 | + std::thread T1(run_sample, Queue, 0); |
| 81 | + std::thread T2(run_sample, Queue, 1); |
| 82 | + std::thread T3(run_sample, Queue, 2); |
| 83 | + |
| 84 | + T1.join(); |
| 85 | + T2.join(); |
| 86 | + T3.join(); |
| 87 | + |
| 88 | + return (results[0] || results[1] || results[2]) ? 1 : 0; |
| 89 | +} |
0 commit comments