Skip to content

Commit f7bf29b

Browse files
authored
[SYCL][XPTI] Performance improvements to XPTI streams (#11651)
Current instrumentation in SYCL runtime has additional overheads due to argument capture even it is not subscribed to or needed. This patch addresses this issue. Signed-off-by: Vasanth Tovinkere <[email protected]>
1 parent 7d9bda9 commit f7bf29b

File tree

2 files changed

+82
-43
lines changed

2 files changed

+82
-43
lines changed

sycl/plugins/level_zero/tracing.cpp

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ constexpr int GMinVer = 1;
3030
#ifdef XPTI_ENABLE_INSTRUMENTATION
3131
static xpti_td *GCallEvent = nullptr;
3232
static xpti_td *GDebugEvent = nullptr;
33+
static uint8_t GCallStreamID = 0;
34+
static uint8_t GDebugStreamID = 0;
3335
#endif // XPTI_ENABLE_INSTRUMENTATION
3436

3537
enum class ZEApiKind {
@@ -43,9 +45,10 @@ void enableZeTracing() {
4345
if (!xptiTraceEnabled())
4446
return;
4547

46-
xptiRegisterStream(ZE_CALL_STREAM_NAME);
48+
// Initialize the required streams and stream ID for use
49+
GCallStreamID = xptiRegisterStream(ZE_CALL_STREAM_NAME);
4750
xptiInitialize(ZE_CALL_STREAM_NAME, GMajVer, GMinVer, GVerStr);
48-
xptiRegisterStream(ZE_DEBUG_STREAM_NAME);
51+
GDebugStreamID = xptiRegisterStream(ZE_DEBUG_STREAM_NAME);
4952
xptiInitialize(ZE_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr);
5053

5154
uint64_t Dummy;
@@ -84,39 +87,51 @@ void enableZeTracing() {
8487
#define _ZE_API(call, domain, cb, params_type) \
8588
Prologue.domain.cb = [](params_type *Params, ze_result_t, void *, void **) { \
8689
if (xptiTraceEnabled()) { \
87-
uint8_t CallStreamID = xptiRegisterStream(ZE_CALL_STREAM_NAME); \
88-
uint8_t DebugStreamID = xptiRegisterStream(ZE_DEBUG_STREAM_NAME); \
89-
CallCorrelationID = xptiGetUniqueId(); \
90-
DebugCorrelationID = xptiGetUniqueId(); \
9190
const char *FuncName = #call; \
92-
xptiNotifySubscribers( \
93-
CallStreamID, (uint16_t)xpti::trace_point_type_t::function_begin, \
94-
GCallEvent, nullptr, CallCorrelationID, FuncName); \
95-
uint32_t FuncID = static_cast<uint32_t>(ZEApiKind::call); \
96-
xpti::function_with_args_t Payload{FuncID, FuncName, Params, nullptr, \
97-
nullptr}; \
98-
xptiNotifySubscribers( \
99-
DebugStreamID, \
100-
(uint16_t)xpti::trace_point_type_t::function_with_args_begin, \
101-
GDebugEvent, nullptr, DebugCorrelationID, &Payload); \
91+
if (xptiCheckTraceEnabled( \
92+
GCallStreamID, \
93+
(uint16_t)xpti::trace_point_type_t::function_begin)) { \
94+
CallCorrelationID = xptiGetUniqueId(); \
95+
xptiNotifySubscribers( \
96+
GCallStreamID, (uint16_t)xpti::trace_point_type_t::function_begin, \
97+
GCallEvent, nullptr, CallCorrelationID, FuncName); \
98+
} \
99+
if (xptiCheckTraceEnabled( \
100+
GDebugStreamID, \
101+
(uint16_t)xpti::trace_point_type_t::function_with_args_begin)) { \
102+
DebugCorrelationID = xptiGetUniqueId(); \
103+
uint32_t FuncID = static_cast<uint32_t>(ZEApiKind::call); \
104+
xpti::function_with_args_t Payload{FuncID, FuncName, Params, nullptr, \
105+
nullptr}; \
106+
xptiNotifySubscribers( \
107+
GDebugStreamID, \
108+
(uint16_t)xpti::trace_point_type_t::function_with_args_begin, \
109+
GDebugEvent, nullptr, DebugCorrelationID, &Payload); \
110+
} \
102111
} \
103112
}; \
104113
Epilogue.domain.cb = [](params_type *Params, ze_result_t Result, void *, \
105114
void **) { \
106115
if (xptiTraceEnabled()) { \
107-
uint8_t CallStreamID = xptiRegisterStream(ZE_CALL_STREAM_NAME); \
108-
uint8_t DebugStreamID = xptiRegisterStream(ZE_DEBUG_STREAM_NAME); \
109116
const char *FuncName = #call; \
110-
xptiNotifySubscribers(CallStreamID, \
111-
(uint16_t)xpti::trace_point_type_t::function_end, \
112-
GCallEvent, nullptr, CallCorrelationID, FuncName); \
113-
uint32_t FuncID = static_cast<uint32_t>(ZEApiKind::call); \
114-
xpti::function_with_args_t Payload{FuncID, FuncName, Params, &Result, \
115-
nullptr}; \
116-
xptiNotifySubscribers( \
117-
DebugStreamID, \
118-
(uint16_t)xpti::trace_point_type_t::function_with_args_end, \
119-
GDebugEvent, nullptr, DebugCorrelationID, &Payload); \
117+
if (xptiCheckTraceEnabled( \
118+
GCallStreamID, \
119+
(uint16_t)xpti::trace_point_type_t::function_end)) { \
120+
xptiNotifySubscribers( \
121+
GCallStreamID, (uint16_t)xpti::trace_point_type_t::function_end, \
122+
GCallEvent, nullptr, CallCorrelationID, FuncName); \
123+
} \
124+
if (xptiCheckTraceEnabled( \
125+
GDebugStreamID, \
126+
(uint16_t)xpti::trace_point_type_t::function_with_args_end)) { \
127+
uint32_t FuncID = static_cast<uint32_t>(ZEApiKind::call); \
128+
xpti::function_with_args_t Payload{FuncID, FuncName, Params, &Result, \
129+
nullptr}; \
130+
xptiNotifySubscribers( \
131+
GDebugStreamID, \
132+
(uint16_t)xpti::trace_point_type_t::function_with_args_end, \
133+
GDebugEvent, nullptr, DebugCorrelationID, &Payload); \
134+
} \
120135
} \
121136
};
122137

sycl/source/detail/plugin.hpp

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ namespace detail {
2828
#ifdef XPTI_ENABLE_INSTRUMENTATION
2929
extern xpti::trace_event_data_t *GPICallEvent;
3030
extern xpti::trace_event_data_t *GPIArgCallEvent;
31+
extern uint8_t PiCallStreamID;
32+
extern uint8_t PiDebugCallStreamID;
3133
#endif
3234

3335
template <PiApiKind Kind, size_t Idx, typename... Args>
@@ -173,24 +175,39 @@ class plugin {
173175
sycl::detail::pi::PiResult call_nocheck(ArgsT... Args) const {
174176
sycl::detail::pi::PiFuncInfo<PiApiOffset> PiCallInfo;
175177
#ifdef XPTI_ENABLE_INSTRUMENTATION
178+
bool CorrelationIDAvailable = false, CorrelationIDWithArgsAvailable = false;
176179
// Emit a function_begin trace for the PI API before the call is executed.
177180
// If arguments need to be captured, then a data structure can be sent in
178181
// the per_instance_user_data field.
179182
const char *PIFnName = PiCallInfo.getFuncName();
180-
uint64_t CorrelationID = pi::emitFunctionBeginTrace(PIFnName);
181-
uint64_t CorrelationIDWithArgs = 0;
183+
uint64_t CorrelationIDWithArgs = 0, CorrelationID = 0;
184+
185+
if (xptiCheckTraceEnabled(
186+
PiCallStreamID,
187+
(uint16_t)xpti::trace_point_type_t::function_begin)) {
188+
CorrelationID = pi::emitFunctionBeginTrace(PIFnName);
189+
CorrelationIDAvailable = true;
190+
}
182191
unsigned char *ArgsDataPtr = nullptr;
183-
using PackCallArgumentsTy =
184-
decltype(packCallArguments<PiApiOffset>(std::forward<ArgsT>(Args)...));
185-
auto ArgsData =
186-
xptiTraceEnabled()
187-
? packCallArguments<PiApiOffset>(std::forward<ArgsT>(Args)...)
188-
: PackCallArgumentsTy{};
189-
// TODO check if stream is observed when corresponding API is present.
190-
if (xptiTraceEnabled()) {
192+
// If subscribers are listening to Pi debug call stream, only then prepare
193+
// the data for the notifications and emit notifications. Even though the
194+
// function emitFunctionWithArgsBeginTrace() checks for the trqace typoe
195+
// using xptiTraceCheckEnabled(), we add a guard here before we prepare the
196+
// data for the notification, as it comes with a cost
197+
if (xptiCheckTraceEnabled(
198+
PiDebugCallStreamID,
199+
(uint16_t)xpti::trace_point_type_t::function_with_args_begin)) {
200+
using PackCallArgumentsTy = decltype(packCallArguments<PiApiOffset>(
201+
std::forward<ArgsT>(Args)...));
202+
auto ArgsData =
203+
xptiTraceEnabled()
204+
? packCallArguments<PiApiOffset>(std::forward<ArgsT>(Args)...)
205+
: PackCallArgumentsTy{};
206+
// TODO check if stream is observed when corresponding API is present.
191207
ArgsDataPtr = ArgsData.data();
192208
CorrelationIDWithArgs = pi::emitFunctionWithArgsBeginTrace(
193209
static_cast<uint32_t>(PiApiOffset), PIFnName, ArgsDataPtr, *MPlugin);
210+
CorrelationIDWithArgsAvailable = true;
194211
}
195212
#endif
196213
sycl::detail::pi::PiResult R = PI_SUCCESS;
@@ -216,11 +233,18 @@ class plugin {
216233
}
217234
}
218235
#ifdef XPTI_ENABLE_INSTRUMENTATION
219-
// Close the function begin with a call to function end
220-
pi::emitFunctionEndTrace(CorrelationID, PIFnName);
221-
pi::emitFunctionWithArgsEndTrace(CorrelationIDWithArgs,
222-
static_cast<uint32_t>(PiApiOffset),
223-
PIFnName, ArgsDataPtr, R, *MPlugin);
236+
// Close the function begin with a call to function end; we do not need to
237+
// check th xptiTraceCheckEnbled() here as it is performed within the
238+
// function
239+
if (CorrelationIDAvailable) {
240+
// Only send function_end notification if function_begin is subscribed to
241+
pi::emitFunctionEndTrace(CorrelationID, PIFnName);
242+
}
243+
if (CorrelationIDWithArgsAvailable) {
244+
pi::emitFunctionWithArgsEndTrace(CorrelationIDWithArgs,
245+
static_cast<uint32_t>(PiApiOffset),
246+
PIFnName, ArgsDataPtr, R, *MPlugin);
247+
}
224248
#endif
225249
return R;
226250
}

0 commit comments

Comments
 (0)