Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][XPTI] Buffer events #681

Merged
merged 15 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions SYCL/XPTI/Inputs/buffer_info_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ XPTI_CALLBACK_API void memCallback(uint16_t, xpti::trace_event_data_t *,
xpti::trace_event_data_t *, uint64_t,
const void *);

XPTI_CALLBACK_API void syclBufferCallback(uint16_t, xpti::trace_event_data_t *,
xpti::trace_event_data_t *, uint64_t,
const void *);

XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
unsigned int MinorVersion,
const char *VersionStr,
Expand All @@ -36,6 +40,26 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end),
memCallback);
}

if (NameView == "sycl.experimental.buffer") {
uint8_t StreamID = xptiRegisterStream(StreamName);
xptiRegisterCallback(StreamID,
static_cast<uint16_t>(
xpti::trace_point_type_t::offload_alloc_construct),
syclBufferCallback);
xptiRegisterCallback(StreamID,
static_cast<uint16_t>(
xpti::trace_point_type_t::offload_alloc_associate),
syclBufferCallback);
xptiRegisterCallback(
StreamID,
static_cast<uint16_t>(xpti::trace_point_type_t::offload_alloc_release),
syclBufferCallback);
xptiRegisterCallback(
StreamID,
static_cast<uint16_t>(xpti::trace_point_type_t::offload_alloc_destruct),
syclBufferCallback);
}
}

XPTI_CALLBACK_API void xptiTraceFinish(const char *streamName) {
Expand All @@ -62,3 +86,43 @@ XPTI_CALLBACK_API void memCallback(uint16_t TraceType,
std::cout << " alloc_pointer : " << Data->alloc_pointer << "\n";
std::cout << " alloc_size : " << Data->alloc_size << "\n";
}

XPTI_CALLBACK_API void syclBufferCallback(uint16_t TraceType,
xpti::trace_event_data_t *Parent,
xpti::trace_event_data_t *Event,
uint64_t IId, const void *UserData) {
std::lock_guard Lock{GMutex};
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
switch (Type) {
case xpti::trace_point_type_t::offload_alloc_construct: {
auto BufConstr = (xpti::offload_buffer_data_t *)UserData;
std::cout << IId << "|Create buffer|" << BufConstr->user_object_handle
<< "|" << Event->reserved.payload->name << "|"
<< Event->reserved.payload->source_file << ":"
<< Event->reserved.payload->line_no << ":"
<< Event->reserved.payload->column_no << "\n";

break;
}
case xpti::trace_point_type_t::offload_alloc_associate: {
auto BufAssoc = (xpti::offload_buffer_association_data_t *)UserData;
std::cout << IId << "|Associate buffer|" << BufAssoc->user_object_handle
<< "|" << BufAssoc->mem_object_handle << std::endl;
break;
}
case xpti::trace_point_type_t::offload_alloc_release: {
auto BufRelease = (xpti::offload_buffer_association_data_t *)UserData;
std::cout << IId << "|Release buffer|" << BufRelease->user_object_handle
<< "|" << BufRelease->mem_object_handle << std::endl;
break;
}
case xpti::trace_point_type_t::offload_alloc_destruct: {
auto BufDestr = (xpti::offload_buffer_data_t *)UserData;
std::cout << IId << "|Destruct buffer|" << BufDestr->user_object_handle
<< "\n";
break;
}
default:
std::cout << "Unknown tracepoint\n";
}
}
48 changes: 48 additions & 0 deletions SYCL/XPTI/buffer/host_array.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// REQUIRES: xptifw, opencl
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1

#ifdef XPTI_COLLECTOR

#include "../Inputs/buffer_info_collector.cpp"

#else

#include <sycl/sycl.hpp>

int main() {
bool MismatchFound = false;
sycl::queue Queue{};

int Array[4];
{
sycl::range<1> NumOfWorkItems{4};
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}host_array.cpp:22:26|{{.*}}host_array.cpp:22:26
sycl::buffer<int, 1> Buffer1(Array, NumOfWorkItems);

// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
Queue.submit([&](sycl::handler &cgh) {
// Get write only access to the buffer on a device.
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
// Execute kernel.
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
Accessor1[WIid] = static_cast<int>(WIid.get(0));
});
});
}

// Check the results.
for (size_t I = 0; I < 4; ++I) {
if (Array[I] != I) {
std::cout << "The result is incorrect for element: " << I
<< " , expected: " << I << " , got: " << Array[I] << std::endl;
MismatchFound = true;
}
}

return MismatchFound;
}
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
#endif
73 changes: 73 additions & 0 deletions SYCL/XPTI/buffer/in_cycle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// REQUIRES: xptifw, opencl
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1

// It looks like order of events diffres on Windows
#ifdef XPTI_COLLECTOR

#include "../Inputs/buffer_info_collector.cpp"

#else

#include <sycl/sycl.hpp>
bool func(sycl::queue &Queue, int depth = 0) {
bool MismatchFound = false;
// Create a buffer of 4 ints to be used inside the kernel code.
sycl::buffer<int, 1> Buffer(4);

// Size of index space for kernel.
sycl::range<1> NumOfWorkItems{Buffer.size()};

// Submit command group(work) to queue.
Queue.submit([&](sycl::handler &cgh) {
// Get write only access to the buffer on a device.
auto Accessor = Buffer.get_access<sycl::access::mode::write>(cgh);
// Execute kernel.
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
Accessor[WIid] = static_cast<int>(WIid.get(0));
});
});

// Get read only access to the buffer on the host.
// This introduces an implicit barrier which blocks execution until the
// command group above completes.
const auto HostAccessor = Buffer.get_access<sycl::access::mode::read>();

// Check the results.
for (size_t I = 0; I < Buffer.size(); ++I) {
if (HostAccessor[I] != I) {
std::cout << "The result is incorrect for element: " << I
<< " , expected: " << I << " , got: " << HostAccessor[I]
<< std::endl;
MismatchFound = true;
}
}

if (depth > 0)
MismatchFound &= func(Queue, depth - 1);
return MismatchFound;
}
int main() {
bool MismatchFound = false;
// Create a SYCL queue.
sycl::queue Queue{};

// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID2:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID2]]|[[#BEID2:]]
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID2]]|[[#BEID2:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID2]]
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID3:]]|{{.*}}in_cycle.cpp:17:24|{{.*}}in_cycle.cpp:17:24
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID3]]|[[#BEID3:]]
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID3]]|[[#BEID3:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID3]]
for (int i = 0; i < 3; i++)
MismatchFound &= func(Queue);
return MismatchFound;
}

#endif
57 changes: 57 additions & 0 deletions SYCL/XPTI/buffer/multiple_buffers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// REQUIRES: xptifw, opencl
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1

#ifdef XPTI_COLLECTOR

#include "../Inputs/buffer_info_collector.cpp"

#else

#include <sycl/sycl.hpp>

int main() {
bool MismatchFound = false;
sycl::queue Queue{};

// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}multiple_buffers.cpp:19:24|{{.*}}multiple_buffers.cpp:19:24
sycl::buffer<int, 1> Buffer1(4);
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID2:]]|{{.*}}multiple_buffers.cpp:21:24|{{.*}}multiple_buffers.cpp:21:24
sycl::buffer<int, 1> Buffer2(4);

sycl::range<1> NumOfWorkItems{Buffer1.size()};

// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID2]]|[[#BEID2:]]
Queue.submit([&](sycl::handler &cgh) {
// Get write only access to the buffer on a device.
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
auto Accessor2 = Buffer2.get_access<sycl::access::mode::write>(cgh);
// Execute kernel.
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
Accessor1[WIid] = static_cast<int>(WIid.get(0));
Accessor2[WIid] = static_cast<int>(WIid.get(0));
});
});

const auto HostAccessor1 = Buffer1.get_access<sycl::access::mode::read>();
const auto HostAccessor2 = Buffer2.get_access<sycl::access::mode::read>();

// Check the results.
for (size_t I = 0; I < Buffer1.size(); ++I) {
if (HostAccessor1[I] != I || HostAccessor2[I] != I) {
std::cout << "The result is incorrect for element: " << I
<< " , expected: " << I << " , got: " << HostAccessor1[I]
<< ", " << HostAccessor2[I] << std::endl;
MismatchFound = true;
}
}

return MismatchFound;
}
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID2]]|[[#BEID2:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID2]]
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]
#endif
66 changes: 66 additions & 0 deletions SYCL/XPTI/buffer/multiple_queues.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// REQUIRES: xptifw, opencl, (cpu || acc)
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1

#ifdef XPTI_COLLECTOR

#include "../Inputs/buffer_info_collector.cpp"

#else

#include <sycl/sycl.hpp>

int main() {
bool MismatchFound = false;

sycl::device Device{sycl::ext::oneapi::filter_selector{"cpu,accelerator"}};
auto Devices = Device.create_sub_devices<
sycl::info::partition_property::partition_equally>(2);

int Array[4] = {0};
{
sycl::queue Queue1{Devices[0]};
sycl::queue Queue2{Devices[1]};
sycl::range<1> NumOfWorkItems{4};
// CHECK:{{[0-9]+}}|Create buffer|[[#USERID1:]]|{{.*}}multiple_queues.cpp:27:26|{{.*}}multiple_queues.cpp:27:26
sycl::buffer<int, 1> Buffer1(Array, NumOfWorkItems);

// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID1:]]
Queue1.submit([&](sycl::handler &cgh) {
// Get write only access to the buffer on a device.
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
// Execute kernel.
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
Accessor1[WIid] = static_cast<int>(WIid.get(0));
});
});
Queue1.wait();

// CHECK:{{[0-9]+}}|Associate buffer|[[#USERID1]]|[[#BEID2:]]
Queue2.submit([&](sycl::handler &cgh) {
// Get write only access to the buffer on a device.
auto Accessor1 = Buffer1.get_access<sycl::access::mode::write>(cgh);
// Execute kernel.
cgh.parallel_for<class MulBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
Accessor1[WIid] *= static_cast<int>(WIid.get(0));
});
});
}
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID1:]]
// CHECK:{{[0-9]+}}|Release buffer|[[#USERID1]]|[[#BEID2:]]
// CHECK:{{[0-9]+}}|Destruct buffer|[[#USERID1]]

// Check the results.
for (size_t I = 0; I < 4; ++I) {
if (Array[I] != I * I) {
std::cout << "The result is incorrect for element: " << I
<< " , expected: " << I * I << " , got: " << Array[I]
<< std::endl;
MismatchFound = true;
}
}

return MismatchFound;
}
#endif
Loading