Skip to content

Commit 435eb7f

Browse files
authored
[XPTI][INFRA] Fixes minor standalone build and code issues (#18452)
This PR is first of many XPTI changes to address the performance issues that have been reported. (1) xpti::Framework::instance() - testing the use of std::call_once against current implementation (2) Added cleanup for default stream "xpti.framework" (3) Switched to EmHash instead of std::unordered_map that improve notification performance by 10% Most of the performance overheads are from xptiRegisterObject() and xptiAddMetadata(). This requires changes in the SYCL runtime an will be addressed in a subsequent PR --------- Signed-off-by: Vasanth Tovinkere <[email protected]>
1 parent b6e0d2e commit 435eb7f

File tree

6 files changed

+51
-20
lines changed

6 files changed

+51
-20
lines changed

xptifw/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.20.0)
22

3-
set(XPTI_VERSION 1.0.0)
3+
set(XPTI_VERSION 1.0.1)
44

55
project (xptifw VERSION "${XPTI_VERSION}" LANGUAGES CXX)
66
set(CMAKE_CXX_STANDARD 17)
@@ -74,8 +74,8 @@ endif()
7474

7575
if (XPTI_BUILD_SAMPLES)
7676
add_subdirectory(samples/basic_collector)
77-
add_subdirectory(samples/syclpi_collector)
7877
add_subdirectory(samples/sycl_perf_collector)
78+
add_subdirectory(samples/syclur_collector)
7979
add_subdirectory(basic_test)
8080
endif()
8181

xptifw/CMakeLists.txt.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 2.8.2)
1+
cmake_minimum_required(VERSION 3.20.0)
22

33
project(googletest-download NONE)
44

xptifw/samples/sycl_perf_collector/sycl_perf_collector.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ event_instances_t *GRecordsInProgress = nullptr;
4141
xpti::utils::timer::measurement_t GMeasure;
4242

4343
constexpr const char *GStreamBasic = "sycl";
44+
constexpr const char *GStreamPerf = "sycl.perf";
4445
constexpr const char *GStreamPI = "ur.call";
4546
constexpr const char *GStreamMemory = "sycl.experimental.mem_alloc";
4647
constexpr const char *GStreamL0 = "sycl.experimental.level_zero.call";
@@ -291,12 +292,14 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version,
291292
// In order to determine if the environment variable contains valid stream
292293
// names, a catalog of all streams must be check against to validate the
293294
// strings
294-
if (ShowVerboseOutput && StreamsToMonitor) {
295-
std::cout << "Monitoring streams: ";
295+
if (StreamsToMonitor) {
296+
if (ShowVerboseOutput)
297+
std::cout << "Monitoring streams: ";
296298

297299
auto streams = D.decode(StreamsToMonitor);
298300
for (auto &s : streams) {
299-
std::cout << s << ", ";
301+
if (ShowVerboseOutput)
302+
std::cout << s << ", ";
300303
GAllStreams.add(s.c_str());
301304
}
302305
if (ShowVerboseOutput)
@@ -394,17 +397,17 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version,
394397
InitStreams = false;
395398
} // First time inititalization complete
396399

397-
if (ShowVerboseOutput)
398-
std::cout << "Initializing stream: " << stream_name << "\n";
399-
400400
// Post initialization: Once the needed data structures are created, we
401401
// register callbacks to the streams requsted by the end-user;
402402
//
403403
// Check == TRUE if the stream has been requested by end-user
404404
//
405405
auto Check = GAllStreams.check(stream_name);
406-
if (Check)
406+
if (Check) {
407407
GStreams->add(stream_name);
408+
if (ShowVerboseOutput)
409+
std::cout << "Initializing stream: " << stream_name << "\n";
410+
}
408411

409412
if (std::string(GStreamBasic) == stream_name && Check) {
410413
auto StreamID = xptiRegisterStream(stream_name);
@@ -436,10 +439,10 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version,
436439
// Handles SelfNotification events
437440
xptiRegisterCallback(StreamID,
438441
(uint16_t)xpti::trace_point_type_t::function_begin,
439-
syclPiCallback);
442+
syclURCallback);
440443
xptiRegisterCallback(StreamID,
441444
(uint16_t)xpti::trace_point_type_t::function_end,
442-
syclPiCallback);
445+
syclURCallback);
443446
} else if (std::string(GStreamMemory) == stream_name && Check) {
444447
auto StreamID = xptiRegisterStream(stream_name);
445448
xptiRegisterCallback(StreamID,
@@ -468,6 +471,14 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version,
468471
xptiRegisterCallback(StreamID,
469472
(uint16_t)xpti::trace_point_type_t::function_end,
470473
syclURCallback);
474+
} else if (std::string(GStreamPerf) == stream_name && Check) {
475+
auto StreamID = xptiRegisterStream(stream_name);
476+
xptiRegisterCallback(StreamID,
477+
(uint16_t)xpti::trace_point_type_t::function_begin,
478+
syclURCallback);
479+
xptiRegisterCallback(StreamID,
480+
(uint16_t)xpti::trace_point_type_t::function_end,
481+
syclURCallback);
471482
} else if (std::string(GStreamL0) == stream_name && Check) {
472483
auto StreamID = xptiRegisterStream(stream_name);
473484
xptiRegisterCallback(StreamID,
@@ -515,7 +526,7 @@ std::once_flag GFinalize, GCompaction;
515526
// terminated sending events in its stream
516527
XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) {
517528
{
518-
if (ShowVerboseOutput)
529+
if (ShowVerboseOutput && GStreams->check(stream_name))
519530
std::cout << "Unregistering stream: " << stream_name << "\n";
520531
std::lock_guard<std::mutex> _{GStreamMutex};
521532
// Filter the streams to what is requested by the user that intersects with

xptifw/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Fetch third party headers
2+
include(FetchContent)
23
set(EMHASH_REPO https://github.com/ktprime/emhash)
34
message(STATUS "Will fetch emhash from ${EMHASH_REPO}")
45
FetchContent_Declare(emhash-headers

xptifw/src/xpti_trace_framework.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,16 @@ static thread_local xpti_tracepoint_t *g_tls_tracepoint_scope_data;
8282
/// Default stream for the trace framework, if none is provided by the user.
8383
constexpr const char *g_default_stream = "xpti.framework";
8484

85+
/// @brief Flag to ensure the default stream is initialized only once.
86+
/// @details Used with `std::call_once` to initialize the default stream in a
87+
/// thread-safe manner.
8588
static std::once_flag g_initialize_default_stream_flag;
8689

90+
/// @brief Flag to ensure the default stream is finalized only once.
91+
/// @details Used with `std::call_once` to finalize the default stream in a
92+
/// thread-safe manner.
93+
static std::once_flag g_finalize_default_stream_flag;
94+
8795
namespace xpti {
8896
/// @var env_subscribers
8997
/// @brief A constant character pointer initialized with the string
@@ -1379,15 +1387,15 @@ class Notifications {
13791387
/// @brief Maps a trace type to its associated callback entries.
13801388
/// @details This unordered map uses a uint16_t as the key to represent the
13811389
/// trace point type, and cb_entries_t to store the associated callbacks.
1382-
using cb_t = std::unordered_map<uint16_t, cb_entries_t>;
1390+
using cb_t = emhash7::HashMap<uint16_t, cb_entries_t>;
13831391

13841392
/// @typedef stream_cb_t
13851393
/// @brief Maps a stream ID to its corresponding callbacks for different
13861394
/// trace types
13871395
/// @details This unordered map uses a uint16_t as the key for the stream
13881396
/// ID, and cb_t to map the stream to registered callbacks for each trace
13891397
/// type
1390-
using stream_cb_t = std::unordered_map<uint16_t, cb_t>;
1398+
using stream_cb_t = emhash7::HashMap<uint16_t, cb_t>;
13911399

13921400
/// @typedef statistics_t
13931401
/// @brief Keeps track of statistics, typically counts, associated with
@@ -1396,13 +1404,13 @@ class Notifications {
13961404
/// the type of statistical data and usually not defined by default. To
13971405
/// enable it, XPTI_STATISTICS has to be defined while compiling the
13981406
/// frmaework library.
1399-
using statistics_t = std::unordered_map<uint16_t, uint64_t>;
1407+
using statistics_t = emhash7::HashMap<uint16_t, uint64_t>;
14001408
/// @typedef trace_flags_t
14011409
/// @brief Maps an trace type to a boolean flag indicating its state.
14021410
/// @details This unordered map uses a uint16_t as the key for the trace
14031411
/// type, and a boolean value to indicate whether callbacks are registered
14041412
/// for this trace type (e.g., registered or unregisterted/no callback).
1405-
using trace_flags_t = std::unordered_map<uint16_t, bool>;
1413+
using trace_flags_t = emhash7::HashMap<uint16_t, bool>;
14061414

14071415
/// @typedef stream_flags_t
14081416
/// @brief Maps a stream ID to its corresponding trace flags for different
@@ -1411,7 +1419,7 @@ class Notifications {
14111419
/// and trace_flags_t to map the trace type to their boolean that indiciates
14121420
/// whether a callback has been registered for this trace type in the given
14131421
/// stream.
1414-
using stream_flags_t = std::unordered_map<uint8_t, trace_flags_t>;
1422+
using stream_flags_t = emhash7::HashMap<uint8_t, trace_flags_t>;
14151423

14161424
/// @brief Registers a callback function for a specific trace type and stream
14171425
/// ID.
@@ -2290,6 +2298,9 @@ class Framework {
22902298
}
22912299

22922300
static Framework &instance() {
2301+
// Using std::call_once has the same overhead as the original approach
2302+
// std::call_once(g_initialize_framework_flag,
2303+
// [&]() { MInstance = new Framework(); });
22932304
Framework *TmpFramework = MInstance.load(std::memory_order_relaxed);
22942305
std::atomic_thread_fence(std::memory_order_acquire);
22952306
if (TmpFramework == nullptr) {
@@ -2309,6 +2320,11 @@ class Framework {
23092320
friend void ::xptiFrameworkFinalize();
23102321

23112322
static void release() {
2323+
// Using std::call_once has the same overhead as the original approach
2324+
// std::call_once(g_release_framework_flag, [&]() {
2325+
// delete MInstance;
2326+
// MInstance = nullptr;
2327+
// });
23122328
Framework *TmpFramework = MInstance.load(std::memory_order_relaxed);
23132329
MInstance.store(nullptr, std::memory_order_relaxed);
23142330
delete TmpFramework;
@@ -2482,7 +2498,10 @@ XPTI_EXPORT_API xpti::result_t xptiInitialize(const char *Stream, uint32_t maj,
24822498
/// when the stream is no longer in use.
24832499

24842500
XPTI_EXPORT_API void xptiFinalize(const char *Stream) {
2485-
xpti::Framework::instance().finalizeStream(Stream);
2501+
auto &FW = xpti::Framework::instance();
2502+
std::call_once(g_finalize_default_stream_flag,
2503+
[&]() { FW.finalizeStream(g_default_stream); });
2504+
FW.finalizeStream(Stream);
24862505
}
24872506

24882507
/// @brief Retrieves the 64-bit universal ID for the current scope, if published

xptifw/unit_test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ include_directories(${XPTI_DIR}/include)
77
if (NOT DEFINED LLVM_EXTERNAL_XPTIFW_SOURCE_DIR)
88
message(STATUS "Building XPTI outside of LLVM Project...")
99
# Download and unpack googletest at configure time
10-
configure_file(../CMakeLists.txt.in googletest-download/CMakeLists.txt)
10+
configure_file(${XPTIFW_DIR}/CMakeLists.txt.in googletest-download/CMakeLists.txt)
1111
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
1212
RESULT_VARIABLE result
1313
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )

0 commit comments

Comments
 (0)