Skip to content

Commit 4c52506

Browse files
[SYCL][Tools] sycl-trace: extract backend specific code to dynamically loaded libraries (#9119)
Not all systems could have cuda driver installed. Previous implementation with statically linked cuda blocks sycl-trace from being run on systems w/o cuda. Move L0 specific and CUDA specific collectors to dynamic libraries loaded by request. NOTE: sycl-trace originally is not adapted to any other OS but Linux. So no general classes and other unified stuff is implemented. --------- Signed-off-by: Tikhomirova, Kseniya <[email protected]>
1 parent c406de0 commit 4c52506

File tree

6 files changed

+226
-85
lines changed

6 files changed

+226
-85
lines changed

sycl/tools/sycl-trace/CMakeLists.txt

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,30 @@ link_llvm_libs(sycl-trace
1111
)
1212

1313
if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS)
14-
set(EXTRA_SRC
14+
add_library(ze_trace_collector SHARED
1515
ze_trace_collector.cpp
1616
)
17+
set(EXTRA_TARGETS_TO_INSTALL
18+
ze_trace_collector
19+
)
20+
add_dependencies(sycl-trace ze_trace_collector)
1721
endif()
1822

1923
if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
20-
set(EXTRA_SRC
21-
${EXTRA_SRC}
24+
add_library(cuda_trace_collector SHARED
2225
cuda_trace_collector.cpp
2326
)
27+
set(EXTRA_TARGETS_TO_INSTALL
28+
${EXTRA_TARGETS_TO_INSTALL}
29+
cuda_trace_collector
30+
)
31+
add_dependencies(sycl-trace cuda_trace_collector)
2432
endif()
2533

2634
add_library(sycl_pi_trace_collector SHARED
2735
collector.cpp
2836
pi_trace_collector.cpp
2937
sycl_trace_collector.cpp
30-
${EXTRA_SRC}
3138
)
3239

3340
find_package(Python3 REQUIRED)
@@ -37,6 +44,7 @@ add_custom_target(pi-pretty-printers
3744
${CMAKE_CURRENT_BINARY_DIR}/pi_printers.def
3845
${CMAKE_CURRENT_BINARY_DIR}/pi_structs.hpp
3946
)
47+
4048
add_custom_command(
4149
OUTPUT
4250
${CMAKE_CURRENT_BINARY_DIR}/pi_printers.def
@@ -49,10 +57,11 @@ add_custom_command(
4957

5058
# To get L0 loader
5159
if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS)
52-
add_dependencies(sycl_pi_trace_collector pi_level_zero)
60+
add_dependencies(ze_trace_collector pi_level_zero)
5361

54-
target_link_libraries(sycl_pi_trace_collector PRIVATE LevelZeroLoader-Headers)
55-
target_compile_definitions(sycl_pi_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO)
62+
target_link_libraries(ze_trace_collector PRIVATE LevelZeroLoader-Headers)
63+
target_compile_definitions(ze_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO)
64+
target_link_libraries(ze_trace_collector PRIVATE xptifw)
5665

5766
add_custom_target(ze-pretty-printers
5867
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ze_printers.def
@@ -68,7 +77,8 @@ if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS)
6877
${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h
6978
)
7079

71-
add_dependencies(sycl_pi_trace_collector ze-pretty-printers)
80+
add_dependencies(ze_trace_collector ze-pretty-printers)
81+
target_compile_definitions(sycl_pi_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO)
7282
endif()
7383

7484
target_compile_definitions(sycl_pi_trace_collector PRIVATE XPTI_CALLBACK_API_EXPORTS)
@@ -90,7 +100,7 @@ if(SYCL_BUILD_PI_CUDA)
90100

91101
find_package(CUDA 10.1 REQUIRED)
92102

93-
target_compile_definitions(sycl_pi_trace_collector
103+
target_compile_definitions(cuda_trace_collector
94104
PRIVATE
95105
$<$<BOOL:${SYCL_BUILD_PI_CUDA}>:USE_PI_CUDA>
96106
)
@@ -106,12 +116,12 @@ if(SYCL_BUILD_PI_CUDA)
106116
find_cuda_cupti_library()
107117
endif()
108118

109-
target_include_directories(sycl_pi_trace_collector
119+
target_include_directories(cuda_trace_collector
110120
PRIVATE
111121
${CUDA_CUPTI_INCLUDE_DIR}
112122
)
113123

114-
target_link_libraries(sycl_pi_trace_collector
124+
target_link_libraries(cuda_trace_collector
115125
PRIVATE
116126
cudadrv
117127
${CUDA_cupti_LIBRARY}
@@ -142,15 +152,19 @@ if(SYCL_BUILD_PI_CUDA)
142152
add_custom_target(cuda-pretty-printers)
143153
endif()
144154

145-
add_dependencies(sycl_pi_trace_collector cuda-pretty-printers)
146-
155+
add_dependencies(cuda_trace_collector cuda-pretty-printers)
156+
target_link_libraries(cuda_trace_collector PRIVATE xptifw)
157+
target_compile_definitions(sycl_pi_trace_collector
158+
PRIVATE
159+
$<$<BOOL:${SYCL_BUILD_PI_CUDA}>:USE_PI_CUDA>
160+
)
147161
endif()
148162

149163
add_dependencies(sycl-trace sycl_pi_trace_collector)
150164
add_dependencies(sycl-toolchain sycl-trace)
151165

152166
include(GNUInstallDirs)
153-
install(TARGETS sycl-trace sycl_pi_trace_collector
167+
install(TARGETS sycl-trace sycl_pi_trace_collector ${EXTRA_TARGETS_TO_INSTALL}
154168
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT sycl-trace
155169
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-trace
156170
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-trace

sycl/tools/sycl-trace/collector.cpp

Lines changed: 163 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,149 @@
88

99
#include "xpti/xpti_trace_framework.h"
1010

11+
#include <dlfcn.h>
12+
#include <iostream>
13+
#include <mutex>
14+
#include <string>
1115
#include <sycl/detail/spinlock.hpp>
1216

1317
sycl::detail::SpinLock GlobalLock;
1418

1519
bool HasZEPrinter = false;
16-
bool HasCUPrinter = false;
17-
bool HasPIPrinter = false;
18-
bool HasSYCLPrinter = false;
1920

20-
void zePrintersInit();
21-
void zePrintersFinish();
21+
std::string getCurrentDSODir() {
22+
auto CurrentFunc = reinterpret_cast<const void *>(&getCurrentDSODir);
23+
Dl_info Info;
24+
int RetCode = dladdr(CurrentFunc, &Info);
25+
if (0 == RetCode) {
26+
// This actually indicates an error
27+
return "";
28+
}
29+
30+
auto Path = std::string(Info.dli_fname);
31+
auto LastSlashPos = Path.find_last_of('/');
32+
33+
return Path.substr(0, LastSlashPos);
34+
}
35+
36+
class CollectorLibraryWrapper {
37+
typedef void (*InitFuncType)();
38+
typedef void (*FinishFuncType)();
39+
typedef void (*CallbackFuncType)(uint16_t, xpti::trace_event_data_t *,
40+
xpti::trace_event_data_t *, uint64_t,
41+
const void *);
42+
typedef void (*SetIndentLvlFuncType)(int);
43+
44+
public:
45+
CollectorLibraryWrapper(const std::string &LibraryName)
46+
: MLibraryName(LibraryName){};
47+
~CollectorLibraryWrapper() { clear(); };
48+
49+
const std::string InitFuncName = "init";
50+
const std::string FinishFuncName = "finish";
51+
const std::string CallbackFuncName = "callback";
52+
const std::string IndentFuncName = "setIndentationLevel";
53+
54+
bool initPrinters() {
55+
std::string Path = getCurrentDSODir();
56+
if (Path.empty())
57+
return false;
58+
Path += "/" + MLibraryName;
59+
MHandle = dlopen(Path.c_str(), RTLD_LAZY);
60+
if (!MHandle) {
61+
std::cerr << "Cannot load library: " << dlerror() << '\n';
62+
return false;
63+
}
64+
auto ExportSymbol = [&](void *&FuncPtr, const std::string &FuncName) {
65+
FuncPtr = dlsym(MHandle, FuncName.c_str());
66+
if (!FuncPtr) {
67+
std::cerr << "Cannot export symbol: " << dlerror() << '\n';
68+
return false;
69+
}
70+
return true;
71+
};
72+
if (!ExportSymbol(MInitPtr, InitFuncName) ||
73+
!ExportSymbol(MFinishPtr, FinishFuncName) ||
74+
!ExportSymbol(MSetIndentationLevelPtr, IndentFuncName) ||
75+
!ExportSymbol(MCallbackPtr, CallbackFuncName)) {
76+
clear();
77+
return false;
78+
}
79+
80+
if (MIndentationLevel)
81+
((SetIndentLvlFuncType)MSetIndentationLevelPtr)(MIndentationLevel);
82+
83+
((InitFuncType)MInitPtr)();
84+
85+
return true;
86+
}
87+
88+
void finishPrinters() {
89+
if (MHandle)
90+
((FinishFuncType)MFinishPtr)();
91+
}
92+
93+
void setIndentationLevel(int Level) {
94+
MIndentationLevel = Level;
95+
if (MHandle)
96+
((SetIndentLvlFuncType)MSetIndentationLevelPtr)(MIndentationLevel);
97+
}
98+
99+
void callback(uint16_t TraceType, xpti::trace_event_data_t *Parent,
100+
xpti::trace_event_data_t *Event, uint64_t Instance,
101+
const void *UserData) {
102+
// Not expected to be called when MHandle == NULL since we should not be
103+
// subscribed if init failed. Although still do the check for sure.
104+
if (MHandle)
105+
((CallbackFuncType)MCallbackPtr)(TraceType, Parent, Event, Instance,
106+
UserData);
107+
}
108+
109+
void clear() {
110+
MInitPtr = nullptr;
111+
MFinishPtr = nullptr;
112+
MCallbackPtr = nullptr;
113+
MSetIndentationLevelPtr = nullptr;
114+
115+
if (MHandle)
116+
dlclose(MHandle);
117+
MHandle = nullptr;
118+
}
119+
120+
private:
121+
std::string MLibraryName;
122+
int MIndentationLevel = 0;
123+
124+
void *MHandle = nullptr;
125+
126+
void *MInitPtr = nullptr;
127+
void *MFinishPtr = nullptr;
128+
void *MCallbackPtr = nullptr;
129+
void *MSetIndentationLevelPtr = nullptr;
130+
} zeCollectorLibrary("libze_trace_collector.so"),
131+
cudaCollectorLibrary("libcuda_trace_collector.so");
132+
133+
// These routing functions are needed to be able to use GlobalLock for
134+
// dynamically loaded collectors.
135+
XPTI_CALLBACK_API void zeCallback(uint16_t TraceType,
136+
xpti::trace_event_data_t *Parent,
137+
xpti::trace_event_data_t *Event,
138+
uint64_t Instance, const void *UserData) {
139+
std::lock_guard<sycl::detail::SpinLock> _{GlobalLock};
140+
return zeCollectorLibrary.callback(TraceType, Parent, Event, Instance,
141+
UserData);
142+
}
22143
#ifdef USE_PI_CUDA
23-
void cuPrintersInit();
24-
void cuPrintersFinish();
144+
XPTI_CALLBACK_API void cudaCallback(uint16_t TraceType,
145+
xpti::trace_event_data_t *Parent,
146+
xpti::trace_event_data_t *Event,
147+
uint64_t Instance, const void *UserData) {
148+
std::lock_guard<sycl::detail::SpinLock> _{GlobalLock};
149+
return cudaCollectorLibrary.callback(TraceType, Parent, Event, Instance,
150+
UserData);
151+
}
25152
#endif
153+
26154
void piPrintersInit();
27155
void piPrintersFinish();
28156
void syclPrintersInit();
@@ -32,20 +160,11 @@ XPTI_CALLBACK_API void piCallback(uint16_t TraceType,
32160
xpti::trace_event_data_t *Parent,
33161
xpti::trace_event_data_t *Event,
34162
uint64_t Instance, const void *UserData);
35-
XPTI_CALLBACK_API void zeCallback(uint16_t TraceType,
36-
xpti::trace_event_data_t *Parent,
37-
xpti::trace_event_data_t *Event,
38-
uint64_t Instance, const void *UserData);
39-
#ifdef USE_PI_CUDA
40-
XPTI_CALLBACK_API void cuCallback(uint16_t TraceType,
41-
xpti::trace_event_data_t *Parent,
42-
xpti::trace_event_data_t *Event,
43-
uint64_t Instance, const void *UserData);
44-
#endif
45163
XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
46164
xpti::trace_event_data_t *Parent,
47165
xpti::trace_event_data_t *Event,
48166
uint64_t Instance, const void *UserData);
167+
49168
XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
50169
unsigned int /*minor_version*/,
51170
const char * /*version_str*/,
@@ -58,30 +177,34 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
58177
piCallback);
59178
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
60179
piCallback);
180+
zeCollectorLibrary.setIndentationLevel(1);
181+
cudaCollectorLibrary.setIndentationLevel(1);
61182
#ifdef SYCL_HAS_LEVEL_ZERO
62183
} else if (std::string_view(StreamName) ==
63184
"sycl.experimental.level_zero.debug" &&
64185
std::getenv("SYCL_TRACE_ZE_ENABLE")) {
65-
zePrintersInit();
66-
uint16_t StreamID = xptiRegisterStream(StreamName);
67-
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin,
68-
zeCallback);
69-
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
70-
zeCallback);
186+
if (zeCollectorLibrary.initPrinters()) {
187+
HasZEPrinter = true;
188+
uint16_t StreamID = xptiRegisterStream(StreamName);
189+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin,
190+
zeCallback);
191+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
192+
zeCallback);
193+
}
71194
#endif
72195
#ifdef USE_PI_CUDA
73196
} else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" &&
74197
std::getenv("SYCL_TRACE_CU_ENABLE")) {
75-
cuPrintersInit();
76-
uint16_t StreamID = xptiRegisterStream(StreamName);
77-
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin,
78-
cuCallback);
79-
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
80-
cuCallback);
198+
if (cudaCollectorLibrary.initPrinters()) {
199+
uint16_t StreamID = xptiRegisterStream(StreamName);
200+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin,
201+
cudaCallback);
202+
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
203+
cudaCallback);
204+
}
81205
#endif
82-
}
83-
if (std::string_view(StreamName) == "sycl" &&
84-
std::getenv("SYCL_TRACE_API_ENABLE")) {
206+
} else if (std::string_view(StreamName) == "sycl" &&
207+
std::getenv("SYCL_TRACE_API_ENABLE")) {
85208
syclPrintersInit();
86209
uint16_t StreamID = xptiRegisterStream(StreamName);
87210
xptiRegisterCallback(StreamID, xpti::trace_diagnostics, syclCallback);
@@ -95,13 +218,17 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) {
95218
#ifdef SYCL_HAS_LEVEL_ZERO
96219
else if (std::string_view(StreamName) ==
97220
"sycl.experimental.level_zero.debug" &&
98-
std::getenv("SYCL_TRACE_ZE_ENABLE"))
99-
zePrintersFinish();
221+
std::getenv("SYCL_TRACE_ZE_ENABLE")) {
222+
zeCollectorLibrary.finishPrinters();
223+
zeCollectorLibrary.clear();
224+
}
100225
#endif
101226
#ifdef USE_PI_CUDA
102227
else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" &&
103-
std::getenv("SYCL_TRACE_CU_ENABLE"))
104-
cuPrintersFinish();
228+
std::getenv("SYCL_TRACE_CU_ENABLE")) {
229+
cudaCollectorLibrary.finishPrinters();
230+
cudaCollectorLibrary.clear();
231+
}
105232
#endif
106233
if (std::string_view(StreamName) == "sycl" &&
107234
std::getenv("SYCL_TRACE_API_ENABLE"))

0 commit comments

Comments
 (0)