Skip to content

Commit 0cd0414

Browse files
[SYCL][CUDA] Add support of CUDA XPTI tracing (#6373)
Fork of #5797 This patch rebases and finalizes (similarly to #6023) the draft in #5797, which already contained the most important commits thanks to @alexbatashev. The most relevant additions of this patch were done in CMake files, in particular - switch from `FindCUDA` CMake (deprecated) module to the `FindCUDAToolkit` one in order to find cupti library by means of `CUDA_cupti_LIBRARY`. This is advisable because on some systems `FindCUDA` fails to find `CUDA_cupti_LIBRARY`. This is also the case of the CI, see the [log](https://github.com/intel/llvm/runs/7115612243?check_suite_focus=true) in case of `FindCUDA` is used. - find `generated_cuda_meta.h` for generating the CUDA printer definitions, since the location of this header file seems to vary depending on the system, in case of this file is not found a warning is printed and no errors are thrown. Co-authored-by: Alexander Batashev <[email protected]>
1 parent 9bef890 commit 0cd0414

File tree

11 files changed

+456
-4
lines changed

11 files changed

+456
-4
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
macro(find_cuda_cupti_library)
2+
# The following if can be removed when FindCUDA -> FindCUDAToolkit
3+
find_library(CUDA_cupti_LIBRARY
4+
NAMES cupti
5+
HINTS ${CUDA_TOOLKIT_ROOT_DIR}
6+
ENV CUDA_PATH
7+
PATH_SUFFIXES nvidia/current lib64 lib/x64 lib
8+
../extras/CUPTI/lib64/
9+
../extras/CUPTI/lib/
10+
)
11+
endmacro()
12+
13+
macro(find_cuda_cupti_include_dir)
14+
find_path(CUDA_CUPTI_INCLUDE_DIR cupti.h PATHS
15+
"${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include"
16+
"${CUDA_INCLUDE_DIRS}/../extras/CUPTI/include"
17+
"${CUDA_INCLUDE_DIRS}"
18+
NO_DEFAULT_PATH)
19+
endmacro()
20+

sycl/doc/design/SYCLInstrumentationUsingXPTI.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,23 @@ arguments made by SYCL application.
320320
| :------------------------: | :-------------------- | :------- |
321321
| `function_with_args_begin` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_with_args_begin` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi.debug` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. </li> <li> **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, and arguments. </li></div> | None |
322322
| `function_with_args_end` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_with_args_end` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi.debug` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_with_args_begin` </li> <li> **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value. </li></div> | None |
323+
324+
## SYCL Stream `"sycl.experimental.cuda.call"` Notification Signatures
325+
326+
This stream transfers events about CUDA Driver API calls made by SYCL
327+
application.
328+
329+
| Trace Point Type | Parameter Description | Metadata |
330+
| :--------------: | :-------------------- | :------- |
331+
| `function_begin` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_begin` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_begin` event with the `function_end` event. </li> <li> **user_data**: Name of the function being called sent in as `const char *` </li></div> | None |
332+
| `function_end` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_end` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_begin` event with the `function_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_begin` </li> <li> **user_data**: Name of the function being called sent in as `const char *` </li></div> | None |
333+
334+
## SYCL Stream `"sycl.experimental.cuda.debug"` Notification Signatures
335+
336+
This stream transfers events about CUDA Driver API calls and their function
337+
arguments made by SYCL application.
338+
339+
| Trace Point Type | Parameter Description | Metadata |
340+
| :------------------------: | :-------------------- | :------- |
341+
| `function_with_args_begin` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_with_args_begin` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi.debug` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. </li> <li> **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, and arguments. </li></div> | None |
342+
| `function_with_args_end` | <div style="text-align: left"><li>**trace_type**: `xpti::trace_point_type_t::function_with_args_end` that marks the beginning of a function</li> <li> **parent**: Event ID created for all functions in the `sycl.pi.debug` layer.</li> <li> **event**: `nullptr` - since the stream of data just captures functions being called.</li> <li> **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_with_args_begin` </li> <li> **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value. </li></div> | None |

sycl/plugins/cuda/CMakeLists.txt

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,48 @@ else()
2323
)
2424
endif()
2525

26+
if (SYCL_ENABLE_XPTI_TRACING)
27+
set(XPTI_PROXY_SRC "${CMAKE_SOURCE_DIR}/../xpti/src/xpti_proxy.cpp")
28+
endif()
29+
30+
# The following two if's can be removed when FindCUDA -> FindCUDAToolkit.
31+
# CUDA_CUPTI_INCLUDE_DIR -> CUDAToolkit_CUPTI_INCLUDE_DIR
32+
include(FindCUDACupti)
33+
if(NOT CUDA_CUPTI_INCLUDE_DIR)
34+
find_cuda_cupti_include_dir()
35+
endif()
36+
# CUDA_cupti_LIBRARY -> CUDAToolkit_cupti_LIBRARY
37+
if(NOT CUDA_cupti_LIBRARY)
38+
find_cuda_cupti_library()
39+
endif()
40+
2641
add_sycl_plugin(cuda
2742
SOURCES
2843
"${sycl_inc_dir}/sycl/detail/pi.h"
2944
"${sycl_inc_dir}/sycl/detail/pi.hpp"
3045
"pi_cuda.hpp"
3146
"pi_cuda.cpp"
47+
"tracing.cpp"
48+
${XPTI_PROXY_SRC}
3249
LIBRARIES
3350
cudadrv
51+
${CUDA_cupti_LIBRARY}
52+
)
53+
54+
if (SYCL_ENABLE_XPTI_TRACING)
55+
target_compile_definitions(pi_cuda PRIVATE
56+
XPTI_ENABLE_INSTRUMENTATION
57+
XPTI_STATIC_LIBRARY
58+
)
59+
target_include_directories(pi_cuda PRIVATE "${CMAKE_SOURCE_DIR}/../xpti/include")
60+
target_link_libraries(pi_cuda PRIVATE ${CMAKE_DL_LIBS})
61+
endif()
62+
63+
64+
target_include_directories(pi_cuda
65+
PRIVATE
66+
${sycl_inc_dir}
67+
${CUDA_CUPTI_INCLUDE_DIR}
3468
)
3569

3670
set_target_properties(pi_cuda PROPERTIES LINKER_LANGUAGE CXX)

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
#include <mutex>
2626
#include <regex>
2727

28+
// Forward declarations
29+
void enableCUDATracing();
30+
void disableCUDATracing();
31+
2832
namespace {
2933
std::string getCudaVersionString() {
3034
int driver_version = 0;
@@ -5309,7 +5313,10 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
53095313
// This API is called by Sycl RT to notify the end of the plugin lifetime.
53105314
// TODO: add a global variable lifetime management code here (see
53115315
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
5312-
pi_result cuda_piTearDown(void *) { return PI_SUCCESS; }
5316+
pi_result cuda_piTearDown(void *) {
5317+
disableCUDATracing();
5318+
return PI_SUCCESS;
5319+
}
53135320

53145321
const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING;
53155322

@@ -5328,6 +5335,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
53285335
std::memset(&(PluginInit->PiFunctionTable), 0,
53295336
sizeof(PluginInit->PiFunctionTable));
53305337

5338+
enableCUDATracing();
5339+
53315340
// Forward calls to CUDA RT.
53325341
#define _PI_CL(pi_api, cuda_api) \
53335342
(PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);

sycl/plugins/cuda/tracing.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
//===-------------- tracing.cpp - CUDA Host API Tracing --------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifdef XPTI_ENABLE_INSTRUMENTATION
10+
#include <xpti/xpti_data_types.h>
11+
#include <xpti/xpti_trace_framework.h>
12+
#endif
13+
14+
#include <cuda.h>
15+
#include <cupti.h>
16+
17+
#include <exception>
18+
#include <iostream>
19+
20+
constexpr auto CUDA_CALL_STREAM_NAME = "sycl.experimental.cuda.call";
21+
constexpr auto CUDA_DEBUG_STREAM_NAME = "sycl.experimental.cuda.debug";
22+
23+
thread_local uint64_t CallCorrelationID = 0;
24+
thread_local uint64_t DebugCorrelationID = 0;
25+
26+
#ifdef XPTI_ENABLE_INSTRUMENTATION
27+
static xpti_td *GCallEvent = nullptr;
28+
static xpti_td *GDebugEvent = nullptr;
29+
#endif // XPTI_ENABLE_INSTRUMENTATION
30+
31+
constexpr auto GVerStr = "0.1";
32+
constexpr int GMajVer = 0;
33+
constexpr int GMinVer = 1;
34+
35+
#ifdef XPTI_ENABLE_INSTRUMENTATION
36+
static void cuptiCallback(void *userdata, CUpti_CallbackDomain,
37+
CUpti_CallbackId CBID, const void *CBData) {
38+
if (xptiTraceEnabled()) {
39+
const auto *CBInfo = static_cast<const CUpti_CallbackData *>(CBData);
40+
41+
if (CBInfo->callbackSite == CUPTI_API_ENTER) {
42+
CallCorrelationID = xptiGetUniqueId();
43+
DebugCorrelationID = xptiGetUniqueId();
44+
}
45+
46+
const char *FuncName = CBInfo->functionName;
47+
uint32_t FuncID = static_cast<uint32_t>(CBID);
48+
uint16_t TraceTypeArgs = CBInfo->callbackSite == CUPTI_API_ENTER
49+
? xpti::trace_function_with_args_begin
50+
: xpti::trace_function_with_args_end;
51+
uint16_t TraceType = CBInfo->callbackSite == CUPTI_API_ENTER
52+
? xpti::trace_function_begin
53+
: xpti::trace_function_end;
54+
55+
uint8_t CallStreamID = xptiRegisterStream(CUDA_CALL_STREAM_NAME);
56+
uint8_t DebugStreamID = xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
57+
58+
xptiNotifySubscribers(CallStreamID, TraceType, GCallEvent, nullptr,
59+
CallCorrelationID, FuncName);
60+
61+
xpti::function_with_args_t Payload{
62+
FuncID, FuncName, const_cast<void *>(CBInfo->functionParams),
63+
CBInfo->functionReturnValue, CBInfo->context};
64+
xptiNotifySubscribers(DebugStreamID, TraceTypeArgs, GDebugEvent, nullptr,
65+
DebugCorrelationID, &Payload);
66+
}
67+
}
68+
#endif
69+
70+
void enableCUDATracing() {
71+
#ifdef XPTI_ENABLE_INSTRUMENTATION
72+
if (!xptiTraceEnabled())
73+
return;
74+
75+
xptiRegisterStream(CUDA_CALL_STREAM_NAME);
76+
xptiInitialize(CUDA_CALL_STREAM_NAME, GMajVer, GMinVer, GVerStr);
77+
xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
78+
xptiInitialize(CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr);
79+
80+
uint64_t Dummy;
81+
xpti::payload_t CUDAPayload("CUDA Plugin Layer");
82+
GCallEvent =
83+
xptiMakeEvent("CUDA Plugin Layer", &CUDAPayload,
84+
xpti::trace_algorithm_event, xpti_at::active, &Dummy);
85+
86+
xpti::payload_t CUDADebugPayload("CUDA Plugin Debug Layer");
87+
GDebugEvent =
88+
xptiMakeEvent("CUDA Plugin Debug Layer", &CUDADebugPayload,
89+
xpti::trace_algorithm_event, xpti_at::active, &Dummy);
90+
91+
CUpti_SubscriberHandle Subscriber;
92+
cuptiSubscribe(&Subscriber, cuptiCallback, nullptr);
93+
cuptiEnableDomain(1, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
94+
cuptiEnableCallback(0, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API,
95+
CUPTI_DRIVER_TRACE_CBID_cuGetErrorString);
96+
cuptiEnableCallback(0, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API,
97+
CUPTI_DRIVER_TRACE_CBID_cuGetErrorName);
98+
#endif
99+
}
100+
101+
void disableCUDATracing() {
102+
#ifdef XPTI_ENABLE_INSTRUMENTATION
103+
if (!xptiTraceEnabled())
104+
return;
105+
106+
xptiFinalize(CUDA_CALL_STREAM_NAME);
107+
xptiFinalize(CUDA_DEBUG_STREAM_NAME);
108+
#endif // XPTI_ENABLE_INSTRUMENTATION
109+
}

sycl/tools/sycl-prof/collector.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
9696
apiBeginEndCallback);
9797
xptiRegisterCallback(StreamID, xpti::trace_function_end,
9898
apiBeginEndCallback);
99+
} else if (NameView == "sycl.experimental.cuda.call") {
100+
uint8_t StreamID = xptiRegisterStream(StreamName);
101+
xptiRegisterCallback(StreamID, xpti::trace_function_begin,
102+
apiBeginEndCallback);
103+
xptiRegisterCallback(StreamID, xpti::trace_function_end,
104+
apiBeginEndCallback);
99105
}
100106
}
101107

sycl/tools/sycl-trace/CMakeLists.txt

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ add_library(sycl_pi_trace_collector SHARED
1414
collector.cpp
1515
pi_trace_collector.cpp
1616
ze_trace_collector.cpp
17+
$<$<BOOL:${SYCL_BUILD_PI_CUDA}>:cuda_trace_collector.cpp>
1718
)
1819

1920
find_package(Python3 REQUIRED)
@@ -56,6 +57,64 @@ target_include_directories(sycl_pi_trace_collector PRIVATE
5657
)
5758

5859
add_dependencies(sycl_pi_trace_collector pi-pretty-printers ze-pretty-printers)
60+
61+
if(SYCL_BUILD_PI_CUDA)
62+
63+
find_package(CUDA 10.1 REQUIRED)
64+
65+
target_compile_definitions(sycl_pi_trace_collector
66+
PRIVATE
67+
$<$<BOOL:${SYCL_BUILD_PI_CUDA}>:USE_PI_CUDA>
68+
)
69+
70+
# The following two if's can be removed when FindCUDA -> FindCUDAToolkit.
71+
# CUDA_CUPTI_INCLUDE_DIR -> CUDAToolkit_CUPTI_INCLUDE_DIR
72+
include(FindCUDACupti)
73+
if(NOT CUDA_CUPTI_INCLUDE_DIR)
74+
find_cuda_cupti_include_dir()
75+
endif()
76+
# CUDA_cupti_LIBRARY -> CUDAToolkit_cupti_LIBRARY
77+
if(NOT CUDA_cupti_LIBRARY)
78+
find_cuda_cupti_library()
79+
endif()
80+
81+
target_include_directories(sycl_pi_trace_collector
82+
PRIVATE
83+
${CUDA_CUPTI_INCLUDE_DIR}
84+
)
85+
86+
target_link_libraries(sycl_pi_trace_collector
87+
PRIVATE
88+
cudadrv
89+
${CUDA_cupti_LIBRARY}
90+
)
91+
92+
find_path(GEN_CUDA_META_H_DIR generated_cuda_meta.h PATHS
93+
"${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include"
94+
"${CUDA_INCLUDE_DIRS}/../extras/CUPTI/include"
95+
"${CUDA_INCLUDE_DIRS}"
96+
NO_DEFAULT_PATH)
97+
98+
if( EXISTS "${GEN_CUDA_META_H_DIR}/generated_cuda_meta.h" )
99+
add_custom_target(cuda-pretty-printers
100+
COMMAND ${Python3_EXECUTABLE}
101+
${CMAKE_CURRENT_SOURCE_DIR}/generate_cuda_pretty_printers.py
102+
${GEN_CUDA_META_H_DIR}/generated_cuda_meta.h
103+
DEPENDS pi_cuda
104+
BYPRODUCTS
105+
${CMAKE_CURRENT_BINARY_DIR}/cuda_printers.def
106+
)
107+
else()
108+
message(WARNING "generated_cuda_meta.h not FOUND!")
109+
message(WARNING "CUDA printer definitions cannot be generated.")
110+
file(TOUCH "${CMAKE_CURRENT_BINARY_DIR}/cuda_printers.def")
111+
add_custom_target(cuda-pretty-printers)
112+
endif()
113+
114+
add_dependencies(sycl_pi_trace_collector cuda-pretty-printers)
115+
116+
endif()
117+
59118
add_dependencies(sycl-trace sycl_pi_trace_collector)
60119
add_dependencies(sycl-toolchain sycl-trace)
61120

sycl/tools/sycl-trace/collector.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,15 @@
1313
sycl::detail::SpinLock GlobalLock;
1414

1515
bool HasZEPrinter = false;
16+
bool HasCUPrinter = false;
1617
bool HasPIPrinter = false;
1718

1819
void zePrintersInit();
1920
void zePrintersFinish();
21+
#ifdef USE_PI_CUDA
22+
void cuPrintersInit();
23+
void cuPrintersFinish();
24+
#endif
2025
void piPrintersInit();
2126
void piPrintersFinish();
2227

@@ -28,7 +33,12 @@ XPTI_CALLBACK_API void zeCallback(uint16_t TraceType,
2833
xpti::trace_event_data_t *Parent,
2934
xpti::trace_event_data_t *Event,
3035
uint64_t Instance, const void *UserData);
31-
36+
#ifdef USE_PI_CUDA
37+
XPTI_CALLBACK_API void cuCallback(uint16_t TraceType,
38+
xpti::trace_event_data_t *Parent,
39+
xpti::trace_event_data_t *Event,
40+
uint64_t Instance, const void *UserData);
41+
#endif
3242
XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
3343
unsigned int /*minor_version*/,
3444
const char * /*version_str*/,
@@ -50,6 +60,16 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
5060
zeCallback);
5161
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
5262
zeCallback);
63+
#ifdef USE_PI_CUDA
64+
} else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" &&
65+
std::getenv("SYCL_TRACE_CU_ENABLE")) {
66+
cuPrintersInit();
67+
uint16_t StreamID = xptiRegisterStream(StreamName);
68+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin,
69+
cuCallback);
70+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
71+
cuCallback);
72+
#endif
5373
}
5474
}
5575

@@ -61,4 +81,9 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) {
6181
"sycl.experimental.level_zero.debug" &&
6282
std::getenv("SYCL_TRACE_ZE_ENABLE"))
6383
zePrintersFinish();
84+
#ifdef USE_PI_CUDA
85+
else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" &&
86+
std::getenv("SYCL_TRACE_CU_ENABLE"))
87+
cuPrintersFinish();
88+
#endif
6489
}

0 commit comments

Comments
 (0)