Skip to content

Commit aaf0afd

Browse files
authored
[SYCL][XPTI] Buffer events (intel/llvm-test-suite#681)
The following test cases are covered: - the buffer without associated host memory; - the buffer with associated host memory; - the buffer with use_host_ptr property; - two buffers used on the same device; - the buffer used on different devices; - the buffer used in recurrent calls; - the buffer created inside a cycle.
1 parent c7f245d commit aaf0afd

File tree

8 files changed

+486
-0
lines changed

8 files changed

+486
-0
lines changed

SYCL/XPTI/Inputs/buffer_info_collector.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ XPTI_CALLBACK_API void memCallback(uint16_t, xpti::trace_event_data_t *,
1010
xpti::trace_event_data_t *, uint64_t,
1111
const void *);
1212

13+
XPTI_CALLBACK_API void syclBufferCallback(uint16_t, xpti::trace_event_data_t *,
14+
xpti::trace_event_data_t *, uint64_t,
15+
const void *);
16+
1317
XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
1418
unsigned int MinorVersion,
1519
const char *VersionStr,
@@ -36,6 +40,26 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
3640
static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end),
3741
memCallback);
3842
}
43+
44+
if (NameView == "sycl.experimental.buffer") {
45+
uint8_t StreamID = xptiRegisterStream(StreamName);
46+
xptiRegisterCallback(StreamID,
47+
static_cast<uint16_t>(
48+
xpti::trace_point_type_t::offload_alloc_construct),
49+
syclBufferCallback);
50+
xptiRegisterCallback(StreamID,
51+
static_cast<uint16_t>(
52+
xpti::trace_point_type_t::offload_alloc_associate),
53+
syclBufferCallback);
54+
xptiRegisterCallback(
55+
StreamID,
56+
static_cast<uint16_t>(xpti::trace_point_type_t::offload_alloc_release),
57+
syclBufferCallback);
58+
xptiRegisterCallback(
59+
StreamID,
60+
static_cast<uint16_t>(xpti::trace_point_type_t::offload_alloc_destruct),
61+
syclBufferCallback);
62+
}
3963
}
4064

4165
XPTI_CALLBACK_API void xptiTraceFinish(const char *streamName) {
@@ -62,3 +86,43 @@ XPTI_CALLBACK_API void memCallback(uint16_t TraceType,
6286
std::cout << " alloc_pointer : " << Data->alloc_pointer << "\n";
6387
std::cout << " alloc_size : " << Data->alloc_size << "\n";
6488
}
89+
90+
XPTI_CALLBACK_API void syclBufferCallback(uint16_t TraceType,
91+
xpti::trace_event_data_t *Parent,
92+
xpti::trace_event_data_t *Event,
93+
uint64_t IId, const void *UserData) {
94+
std::lock_guard Lock{GMutex};
95+
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
96+
switch (Type) {
97+
case xpti::trace_point_type_t::offload_alloc_construct: {
98+
auto BufConstr = (xpti::offload_buffer_data_t *)UserData;
99+
std::cout << IId << "|Create buffer|" << BufConstr->user_object_handle
100+
<< "|" << Event->reserved.payload->name << "|"
101+
<< Event->reserved.payload->source_file << ":"
102+
<< Event->reserved.payload->line_no << ":"
103+
<< Event->reserved.payload->column_no << "\n";
104+
105+
break;
106+
}
107+
case xpti::trace_point_type_t::offload_alloc_associate: {
108+
auto BufAssoc = (xpti::offload_buffer_association_data_t *)UserData;
109+
std::cout << IId << "|Associate buffer|" << BufAssoc->user_object_handle
110+
<< "|" << BufAssoc->mem_object_handle << std::endl;
111+
break;
112+
}
113+
case xpti::trace_point_type_t::offload_alloc_release: {
114+
auto BufRelease = (xpti::offload_buffer_association_data_t *)UserData;
115+
std::cout << IId << "|Release buffer|" << BufRelease->user_object_handle
116+
<< "|" << BufRelease->mem_object_handle << std::endl;
117+
break;
118+
}
119+
case xpti::trace_point_type_t::offload_alloc_destruct: {
120+
auto BufDestr = (xpti::offload_buffer_data_t *)UserData;
121+
std::cout << IId << "|Destruct buffer|" << BufDestr->user_object_handle
122+
<< "\n";
123+
break;
124+
}
125+
default:
126+
std::cout << "Unknown tracepoint\n";
127+
}
128+
}

SYCL/XPTI/buffer/host_array.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// REQUIRES: xptifw, opencl
2+
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
5+
6+
#ifdef XPTI_COLLECTOR
7+
8+
#include "../Inputs/buffer_info_collector.cpp"
9+
10+
#else
11+
12+
#include <sycl/sycl.hpp>
13+
14+
int main() {
15+
bool MismatchFound = false;
16+
sycl::queue Queue{};
17+
18+
int Array[4];
19+
{
20+
sycl::range<1> NumOfWorkItems{4};
21+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}host_array.cpp:22:26|{{.*}}host_array.cpp:22:26
22+
sycl::buffer<int, 1> Buffer1(Array, NumOfWorkItems);
23+
24+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
25+
Queue.submit([&](sycl::handler &cgh) {
26+
// Get write only access to the buffer on a device.
27+
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
28+
// Execute kernel.
29+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
30+
Accessor1[WIid] = static_cast<int>(WIid.get(0));
31+
});
32+
});
33+
}
34+
35+
// Check the results.
36+
for (size_t I = 0; I < 4; ++I) {
37+
if (Array[I] != I) {
38+
std::cout << "The result is incorrect for element: " << I
39+
<< " , expected: " << I << " , got: " << Array[I] << std::endl;
40+
MismatchFound = true;
41+
}
42+
}
43+
44+
return MismatchFound;
45+
}
46+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
47+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
48+
#endif

SYCL/XPTI/buffer/in_cycle.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// REQUIRES: xptifw, opencl
2+
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
5+
6+
// It looks like order of events diffres on Windows
7+
#ifdef XPTI_COLLECTOR
8+
9+
#include "../Inputs/buffer_info_collector.cpp"
10+
11+
#else
12+
13+
#include <sycl/sycl.hpp>
14+
bool func(sycl::queue &Queue, int depth = 0) {
15+
bool MismatchFound = false;
16+
// Create a buffer of 4 ints to be used inside the kernel code.
17+
sycl::buffer<int, 1> Buffer(4);
18+
19+
// Size of index space for kernel.
20+
sycl::range<1> NumOfWorkItems{Buffer.size()};
21+
22+
// Submit command group(work) to queue.
23+
Queue.submit([&](sycl::handler &cgh) {
24+
// Get write only access to the buffer on a device.
25+
auto Accessor = Buffer.get_access<sycl::access::mode::write>(cgh);
26+
// Execute kernel.
27+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
28+
Accessor[WIid] = static_cast<int>(WIid.get(0));
29+
});
30+
});
31+
32+
// Get read only access to the buffer on the host.
33+
// This introduces an implicit barrier which blocks execution until the
34+
// command group above completes.
35+
const auto HostAccessor = Buffer.get_access<sycl::access::mode::read>();
36+
37+
// Check the results.
38+
for (size_t I = 0; I < Buffer.size(); ++I) {
39+
if (HostAccessor[I] != I) {
40+
std::cout << "The result is incorrect for element: " << I
41+
<< " , expected: " << I << " , got: " << HostAccessor[I]
42+
<< std::endl;
43+
MismatchFound = true;
44+
}
45+
}
46+
47+
if (depth > 0)
48+
MismatchFound &= func(Queue, depth - 1);
49+
return MismatchFound;
50+
}
51+
int main() {
52+
bool MismatchFound = false;
53+
// Create a SYCL queue.
54+
sycl::queue Queue{};
55+
56+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
57+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
58+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
59+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
60+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID2:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
61+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID2]]|[[#BEID2:]]
62+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID2]]|[[#BEID2:]]
63+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID2]]
64+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID3:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
65+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID3]]|[[#BEID3:]]
66+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID3]]|[[#BEID3:]]
67+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID3]]
68+
for (int i = 0; i < 3; i++)
69+
MismatchFound &= func(Queue);
70+
return MismatchFound;
71+
}
72+
73+
#endif

SYCL/XPTI/buffer/multiple_buffers.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// REQUIRES: xptifw, opencl
2+
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
5+
6+
#ifdef XPTI_COLLECTOR
7+
8+
#include "../Inputs/buffer_info_collector.cpp"
9+
10+
#else
11+
12+
#include <sycl/sycl.hpp>
13+
14+
int main() {
15+
bool MismatchFound = false;
16+
sycl::queue Queue{};
17+
18+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}multiple_buffers.cpp:19:24|{{.*}}multiple_buffers.cpp:19:24
19+
sycl::buffer<int, 1> Buffer1(4);
20+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID2:]]|{{.*}}multiple_buffers.cpp:21:24|{{.*}}multiple_buffers.cpp:21:24
21+
sycl::buffer<int, 1> Buffer2(4);
22+
23+
sycl::range<1> NumOfWorkItems{Buffer1.size()};
24+
25+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
26+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID2]]|[[#BEID2:]]
27+
Queue.submit([&](sycl::handler &cgh) {
28+
// Get write only access to the buffer on a device.
29+
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
30+
auto Accessor2 = Buffer2.get_access<sycl::access::mode::write>(cgh);
31+
// Execute kernel.
32+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
33+
Accessor1[WIid] = static_cast<int>(WIid.get(0));
34+
Accessor2[WIid] = static_cast<int>(WIid.get(0));
35+
});
36+
});
37+
38+
const auto HostAccessor1 = Buffer1.get_access<sycl::access::mode::read>();
39+
const auto HostAccessor2 = Buffer2.get_access<sycl::access::mode::read>();
40+
41+
// Check the results.
42+
for (size_t I = 0; I < Buffer1.size(); ++I) {
43+
if (HostAccessor1[I] != I || HostAccessor2[I] != I) {
44+
std::cout << "The result is incorrect for element: " << I
45+
<< " , expected: " << I << " , got: " << HostAccessor1[I]
46+
<< ", " << HostAccessor2[I] << std::endl;
47+
MismatchFound = true;
48+
}
49+
}
50+
51+
return MismatchFound;
52+
}
53+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID2]]|[[#BEID2:]]
54+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID2]]
55+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
56+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
57+
#endif

SYCL/XPTI/buffer/multiple_queues.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// REQUIRES: xptifw, opencl, (cpu || acc)
2+
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
5+
6+
#ifdef XPTI_COLLECTOR
7+
8+
#include "../Inputs/buffer_info_collector.cpp"
9+
10+
#else
11+
12+
#include <sycl/sycl.hpp>
13+
14+
int main() {
15+
bool MismatchFound = false;
16+
17+
sycl::device Device{sycl::ext::oneapi::filter_selector{"cpu,accelerator"}};
18+
auto Devices = Device.create_sub_devices<
19+
sycl::info::partition_property::partition_equally>(2);
20+
21+
int Array[4] = {0};
22+
{
23+
sycl::queue Queue1{Devices[0]};
24+
sycl::queue Queue2{Devices[1]};
25+
sycl::range<1> NumOfWorkItems{4};
26+
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}multiple_queues.cpp:27:26|{{.*}}multiple_queues.cpp:27:26
27+
sycl::buffer<int, 1> Buffer1(Array, NumOfWorkItems);
28+
29+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
30+
Queue1.submit([&](sycl::handler &cgh) {
31+
// Get write only access to the buffer on a device.
32+
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
33+
// Execute kernel.
34+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
35+
Accessor1[WIid] = static_cast<int>(WIid.get(0));
36+
});
37+
});
38+
Queue1.wait();
39+
40+
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID2:]]
41+
Queue2.submit([&](sycl::handler &cgh) {
42+
// Get write only access to the buffer on a device.
43+
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
44+
// Execute kernel.
45+
cgh.parallel_for<class MulBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
46+
Accessor1[WIid] *= static_cast<int>(WIid.get(0));
47+
});
48+
});
49+
}
50+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
51+
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID2:]]
52+
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
53+
54+
// Check the results.
55+
for (size_t I = 0; I < 4; ++I) {
56+
if (Array[I] != I * I) {
57+
std::cout << "The result is incorrect for element: " << I
58+
<< " , expected: " << I * I << " , got: " << Array[I]
59+
<< std::endl;
60+
MismatchFound = true;
61+
}
62+
}
63+
64+
return MismatchFound;
65+
}
66+
#endif

0 commit comments

Comments
 (0)