Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit e2d4330

Browse files
authored
[SYCL] [L0] Add test for per-thread per-queue immediate command lists. (#1642)
1 parent 7f29b50 commit e2d4330

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// REQUIRES: gpu, level_zero
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out
4+
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=CHECK-ONE-CMDLIST %s
5+
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2 ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=CHECK-PER-THREAD-CMDLIST %s
6+
7+
// The test checks that immediate commandlists are created per-thread.
8+
// One immediate commandlist is created for device init, the rest for the queue.
9+
10+
// CHECK-ONE-CMDLIST: zeCommandListCreateImmediate = 2
11+
// CHECK-PER-THREAD-CMDLIST: zeCommandListCreateImmediate = 4
12+
13+
#include <sycl/sycl.hpp>
14+
#include <thread>
15+
16+
using namespace sycl;
17+
18+
bool results[3];
19+
20+
bool run_sample_kernel(queue Queue, int n) {
21+
// Creating buffer of 4 ints to be used inside the kernel code
22+
buffer<cl_int, 1> Buffer(4);
23+
24+
// Size of index space for kernel
25+
range<1> NumOfWorkItems{Buffer.size()};
26+
27+
// Submitting command group(work) to queue
28+
Queue.submit([&](handler &cgh) {
29+
// Getting write only access to the buffer on a device
30+
accessor Accessor = {Buffer, cgh, write_only};
31+
// Executing kernel
32+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](id<1> WIid) {
33+
// Fill buffer with indexes
34+
Accessor[WIid] = (cl_int)WIid.get(0);
35+
});
36+
});
37+
38+
// Getting read only access to the buffer on the host.
39+
// Implicit barrier waiting for queue to complete the work.
40+
const host_accessor HostAccessor = {Buffer, read_only};
41+
42+
// Check the results
43+
bool MismatchFound = false;
44+
for (size_t I = 0; I < Buffer.size(); ++I) {
45+
if (HostAccessor[I] != I) {
46+
std::cout << "The result is incorrect for element: " << I
47+
<< " , expected: " << I << " , got: " << HostAccessor[I]
48+
<< std::endl;
49+
MismatchFound = true;
50+
}
51+
}
52+
53+
if (!MismatchFound) {
54+
std::cout << "The results are correct!" << std::endl;
55+
}
56+
57+
return MismatchFound;
58+
}
59+
60+
void run_sample(queue Queue, int n) {
61+
results[n] = false;
62+
for (int i = 0; i < 5; i++)
63+
results[n] |= run_sample_kernel(Queue, n);
64+
}
65+
66+
int main() {
67+
68+
// Creating SYCL queue
69+
queue Queue;
70+
71+
// Create one queue
72+
auto D = Queue.get_device();
73+
const char *devType = D.is_cpu() ? "CPU" : "GPU";
74+
std::string pluginName = D.get_platform().get_info<info::platform::name>();
75+
std::cout << "Running on device " << devType << " ("
76+
<< D.get_info<info::device::name>() << ") " << pluginName
77+
<< " plugin\n";
78+
79+
// Use queue in multiple threads
80+
std::thread T1(run_sample, Queue, 0);
81+
std::thread T2(run_sample, Queue, 1);
82+
std::thread T3(run_sample, Queue, 2);
83+
84+
T1.join();
85+
T2.join();
86+
T3.join();
87+
88+
return (results[0] || results[1] || results[2]) ? 1 : 0;
89+
}

0 commit comments

Comments
 (0)