Skip to content

Commit bb4ce61

Browse files
author
Alexander Batashev
authored
[SYCL] Wrap complex global objects to control lifetime (#2516)
The order of global constructor/destructor invocation is undefined. At the same time, user may desire to make objects of SYCL classes global. Since there're lots of global objects inside SYCL runtime (scheduler and program manager, to name a few), they need to be wrapped with some trivial class/structure. The object of that structure is allocated on heap and deallocated in __attribute__((destructor))/DllMain, which guarantees, that SYCL global objects will be destroyed after user objects.
1 parent 304067c commit bb4ce61

File tree

16 files changed

+349
-42
lines changed

16 files changed

+349
-42
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//==-------------------- spinlock.hpp --- Spin lock ------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/defines.hpp>
12+
13+
#include <atomic>
14+
#include <thread>
15+
16+
__SYCL_INLINE_NAMESPACE(cl) {
17+
namespace sycl {
18+
namespace detail {
19+
/// SpinLock is a synchronization primitive, that uses atomic variable and
20+
/// causes thread trying acquire lock wait in loop while repeatedly check if
21+
/// the lock is available.
22+
///
23+
/// One important feature of this implementation is that std::atomic<bool> can
24+
/// be zero-initialized. This allows SpinLock to have trivial constructor and
25+
/// destructor, which makes it possible to use it in global context (unlike
26+
/// std::mutex, that doesn't provide such guarantees).
27+
class SpinLock {
28+
public:
29+
void lock() {
30+
while (MLock.test_and_set(std::memory_order_acquire))
31+
std::this_thread::yield();
32+
}
33+
void unlock() { MLock.clear(std::memory_order_release); }
34+
35+
private:
36+
std::atomic_flag MLock{ATOMIC_FLAG_INIT};
37+
};
38+
} // namespace detail
39+
} // namespace sycl
40+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ set(SYCL_SOURCES
114114
"detail/event_impl.cpp"
115115
"detail/filter_selector_impl.cpp"
116116
"detail/force_device.cpp"
117+
"detail/global_handler.cpp"
117118
"detail/helpers.cpp"
118119
"detail/handler_proxy.cpp"
119120
"detail/image_accessor_util.cpp"

sycl/source/detail/config.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <CL/sycl/detail/device_filter.hpp>
1414
#include <CL/sycl/detail/pi.hpp>
1515
#include <CL/sycl/info/info_desc.hpp>
16+
#include <detail/global_handler.hpp>
1617

1718
#include <algorithm>
1819
#include <array>
@@ -181,8 +182,7 @@ template <> class SYCLConfig<SYCL_DEVICE_FILTER> {
181182

182183
const char *ValStr = BaseT::getRawValue();
183184
if (ValStr) {
184-
static device_filter_list DFL{ValStr};
185-
FilterList = &DFL;
185+
FilterList = &GlobalHandler::instance().getDeviceFilterList(ValStr);
186186
}
187187

188188
// TODO: remove the following code when we remove the support for legacy

sycl/source/detail/global_handler.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//==--------- global_handler.cpp --- Global objects handler ----------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <CL/sycl/detail/device_filter.hpp>
10+
#include <CL/sycl/detail/spinlock.hpp>
11+
#include <detail/global_handler.hpp>
12+
#include <detail/platform_impl.hpp>
13+
#include <detail/plugin.hpp>
14+
#include <detail/program_manager/program_manager.hpp>
15+
#include <detail/scheduler/scheduler.hpp>
16+
17+
#ifdef WIN32
18+
#include <windows.h>
19+
#endif
20+
21+
#include <vector>
22+
23+
__SYCL_INLINE_NAMESPACE(cl) {
24+
namespace sycl {
25+
namespace detail {
26+
GlobalHandler::GlobalHandler() = default;
27+
GlobalHandler::~GlobalHandler() = default;
28+
29+
GlobalHandler &GlobalHandler::instance() {
30+
static GlobalHandler *SyclGlobalObjectsHandler = new GlobalHandler();
31+
return *SyclGlobalObjectsHandler;
32+
}
33+
34+
Scheduler &GlobalHandler::getScheduler() {
35+
if (MScheduler)
36+
return *MScheduler;
37+
38+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
39+
if (!MScheduler)
40+
MScheduler = std::make_unique<Scheduler>();
41+
42+
return *MScheduler;
43+
}
44+
ProgramManager &GlobalHandler::getProgramManager() {
45+
if (MProgramManager)
46+
return *MProgramManager;
47+
48+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
49+
if (!MProgramManager)
50+
MProgramManager = std::make_unique<ProgramManager>();
51+
52+
return *MProgramManager;
53+
}
54+
Sync &GlobalHandler::getSync() {
55+
if (MSync)
56+
return *MSync;
57+
58+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
59+
if (!MSync)
60+
MSync = std::make_unique<Sync>();
61+
62+
return *MSync;
63+
}
64+
std::vector<PlatformImplPtr> &GlobalHandler::getPlatformCache() {
65+
if (MPlatformCache)
66+
return *MPlatformCache;
67+
68+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
69+
if (!MPlatformCache)
70+
MPlatformCache = std::make_unique<std::vector<PlatformImplPtr>>();
71+
72+
return *MPlatformCache;
73+
}
74+
std::mutex &GlobalHandler::getPlatformMapMutex() {
75+
if (MPlatformMapMutex)
76+
return *MPlatformMapMutex;
77+
78+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
79+
if (!MPlatformMapMutex)
80+
MPlatformMapMutex = std::make_unique<std::mutex>();
81+
82+
return *MPlatformMapMutex;
83+
}
84+
std::mutex &GlobalHandler::getFilterMutex() {
85+
if (MFilterMutex)
86+
return *MFilterMutex;
87+
88+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
89+
if (!MFilterMutex)
90+
MFilterMutex = std::make_unique<std::mutex>();
91+
92+
return *MFilterMutex;
93+
}
94+
std::vector<plugin> &GlobalHandler::getPlugins() {
95+
if (MPlugins)
96+
return *MPlugins;
97+
98+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
99+
if (!MPlugins)
100+
MPlugins = std::make_unique<std::vector<plugin>>();
101+
102+
return *MPlugins;
103+
}
104+
device_filter_list &
105+
GlobalHandler::getDeviceFilterList(const std::string &InitValue) {
106+
if (MDeviceFilterList)
107+
return *MDeviceFilterList;
108+
109+
const std::lock_guard<SpinLock> Lock{MFieldsLock};
110+
if (!MDeviceFilterList)
111+
MDeviceFilterList = std::make_unique<device_filter_list>(InitValue);
112+
113+
return *MDeviceFilterList;
114+
}
115+
116+
void shutdown() { delete &GlobalHandler::instance(); }
117+
118+
#ifdef WIN32
119+
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) {
120+
// Perform actions based on the reason for calling.
121+
switch (fdwReason) {
122+
case DLL_PROCESS_DETACH:
123+
shutdown();
124+
break;
125+
case DLL_PROCESS_ATTACH:
126+
case DLL_THREAD_ATTACH:
127+
case DLL_THREAD_DETACH:
128+
break;
129+
}
130+
return TRUE; // Successful DLL_PROCESS_ATTACH.
131+
}
132+
#else
133+
// Setting maximum priority on destructor ensures it runs after all other global
134+
// destructors.
135+
__attribute__((destructor(65535))) static void syclUnload() { shutdown(); }
136+
#endif
137+
} // namespace detail
138+
} // namespace sycl
139+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/global_handler.hpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//==--------- global_handler.hpp --- Global objects handler ----------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/spinlock.hpp>
12+
#include <CL/sycl/detail/util.hpp>
13+
14+
#include <memory>
15+
16+
__SYCL_INLINE_NAMESPACE(cl) {
17+
namespace sycl {
18+
namespace detail {
19+
class platform_impl;
20+
class Scheduler;
21+
class ProgramManager;
22+
class Sync;
23+
class plugin;
24+
class device_filter_list;
25+
26+
using PlatformImplPtr = std::shared_ptr<platform_impl>;
27+
28+
/// Wrapper class for global data structures with non-trivial destructors.
29+
///
30+
/// As user code can call SYCL Runtime functions from destructor of global
31+
/// objects, it is not safe for the runtime library to have global objects with
32+
/// non-trivial destructors. Such destructors can be called any time after
33+
/// exiting main, which may result in user application crashes. Instead,
34+
/// complex global objects must be wrapped into GlobalHandler. Its instance
35+
/// is stored on heap, and deallocated when the runtime library is being
36+
/// unloaded.
37+
///
38+
/// There's no need to store trivial globals here, as no code for their
39+
/// construction or destruction is generated anyway.
40+
class GlobalHandler {
41+
public:
42+
/// \return a reference to a GlobalHandler singleton instance. Memory for
43+
/// storing objects is allocated on first call. The reference is valid as long
44+
/// as runtime library is loaded (i.e. untill `DllMain` or
45+
/// `__attribute__((destructor))` is called).
46+
static GlobalHandler &instance();
47+
48+
GlobalHandler(const GlobalHandler &) = delete;
49+
GlobalHandler(GlobalHandler &&) = delete;
50+
51+
Scheduler &getScheduler();
52+
ProgramManager &getProgramManager();
53+
Sync &getSync();
54+
std::vector<PlatformImplPtr> &getPlatformCache();
55+
std::mutex &getPlatformMapMutex();
56+
std::mutex &getFilterMutex();
57+
std::vector<plugin> &getPlugins();
58+
device_filter_list &getDeviceFilterList(const std::string &InitValue);
59+
60+
private:
61+
friend void shutdown();
62+
// Constructor and destructor are declared out-of-line to allow incomplete
63+
// types as template arguments to unique_ptr.
64+
GlobalHandler();
65+
~GlobalHandler();
66+
67+
SpinLock MFieldsLock;
68+
69+
std::unique_ptr<Scheduler> MScheduler;
70+
std::unique_ptr<ProgramManager> MProgramManager;
71+
std::unique_ptr<Sync> MSync;
72+
std::unique_ptr<std::vector<PlatformImplPtr>> MPlatformCache;
73+
std::unique_ptr<std::mutex> MPlatformMapMutex;
74+
std::unique_ptr<std::mutex> MFilterMutex;
75+
std::unique_ptr<std::vector<plugin>> MPlugins;
76+
std::unique_ptr<device_filter_list> MDeviceFilterList;
77+
};
78+
} // namespace detail
79+
} // namespace sycl
80+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/pi.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <CL/sycl/detail/device_filter.hpp>
1818
#include <CL/sycl/detail/pi.hpp>
1919
#include <detail/config.hpp>
20+
#include <detail/global_handler.hpp>
2021
#include <detail/plugin.hpp>
2122

2223
#include <bitset>
@@ -283,18 +284,12 @@ bool trace(TraceLevel Level) {
283284
// Initializes all available Plugins.
284285
const vector_class<plugin> &initialize() {
285286
static std::once_flag PluginsInitDone;
286-
static vector_class<plugin> *Plugins = nullptr;
287287

288288
std::call_once(PluginsInitDone, []() {
289-
// The memory for "Plugins" is intentionally leaked because the application
290-
// may call into the SYCL runtime from a global destructor, and such a call
291-
// could eventually call down to initialize(). Therefore, there is no safe
292-
// time when "Plugins" could be deleted.
293-
Plugins = new vector_class<plugin>;
294-
initializePlugins(Plugins);
289+
initializePlugins(&GlobalHandler::instance().getPlugins());
295290
});
296291

297-
return *Plugins;
292+
return GlobalHandler::instance().getPlugins();
298293
}
299294

300295
static void initializePlugins(vector_class<plugin> *Plugins) {

sycl/source/detail/platform_impl.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010
#include <detail/config.hpp>
1111
#include <detail/device_impl.hpp>
1212
#include <detail/force_device.hpp>
13+
#include <detail/global_handler.hpp>
1314
#include <detail/platform_impl.hpp>
1415
#include <detail/platform_info.hpp>
1516

1617
#include <algorithm>
1718
#include <cstring>
1819
#include <regex>
1920
#include <string>
21+
#include <vector>
2022

2123
__SYCL_INLINE_NAMESPACE(cl) {
2224
namespace sycl {
@@ -32,12 +34,13 @@ PlatformImplPtr platform_impl::getHostPlatformImpl() {
3234

3335
PlatformImplPtr platform_impl::getOrMakePlatformImpl(RT::PiPlatform PiPlatform,
3436
const plugin &Plugin) {
35-
static std::vector<PlatformImplPtr> PlatformCache;
36-
static std::mutex PlatformMapMutex;
37-
3837
PlatformImplPtr Result;
3938
{
40-
const std::lock_guard<std::mutex> Guard(PlatformMapMutex);
39+
const std::lock_guard<std::mutex> Guard(
40+
GlobalHandler::instance().getPlatformMapMutex());
41+
42+
std::vector<PlatformImplPtr> &PlatformCache =
43+
GlobalHandler::instance().getPlatformCache();
4144

4245
// If we've already seen this platform, return the impl
4346
for (const auto &PlatImpl : PlatformCache) {
@@ -323,7 +326,7 @@ platform_impl::get_devices(info::device_type DeviceType) const {
323326
if (is_host() || DeviceType == info::device_type::host)
324327
return Res;
325328

326-
pi_uint32 NumDevices;
329+
pi_uint32 NumDevices = 0;
327330
const detail::plugin &Plugin = getPlugin();
328331
Plugin.call<PiApiKind::piDevicesGet>(
329332
MPlatform, pi::cast<RT::PiDeviceType>(DeviceType), 0,

sycl/source/detail/platform_impl.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ namespace sycl {
2020
// Forward declaration
2121
class device_selector;
2222
class device;
23+
enum class aspect;
2324

2425
namespace detail {
26+
class device_impl;
2527

2628
// TODO: implement extension management for host device
2729
// TODO: implement parameters treatment for host device

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <detail/config.hpp>
2020
#include <detail/context_impl.hpp>
2121
#include <detail/device_impl.hpp>
22+
#include <detail/global_handler.hpp>
2223
#include <detail/program_impl.hpp>
2324
#include <detail/program_manager/program_manager.hpp>
2425
#include <detail/spec_constant_impl.hpp>
@@ -47,9 +48,7 @@ enum BuildState { BS_InProgress, BS_Done, BS_Failed };
4748
static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
4849

4950
ProgramManager &ProgramManager::getInstance() {
50-
// The singleton ProgramManager instance, uses the "magic static" idiom.
51-
static ProgramManager Instance;
52-
return Instance;
51+
return GlobalHandler::instance().getProgramManager();
5352
}
5453

5554
static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,

0 commit comments

Comments
 (0)