Skip to content

[SYCL] Wrap complex global objects to control lifetime #2516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Oct 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions sycl/include/CL/sycl/detail/spinlock.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//==-------------------- spinlock.hpp --- Spin lock ------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/detail/defines.hpp>

#include <atomic>
#include <thread>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
/// SpinLock is a synchronization primitive, that uses atomic variable and
/// causes thread trying acquire lock wait in loop while repeatedly check if
/// the lock is available.
///
/// One important feature of this implementation is that std::atomic<bool> can
/// be zero-initialized. This allows SpinLock to have trivial constructor and
/// destructor, which makes it possible to use it in global context (unlike
/// std::mutex, that doesn't provide such guarantees).
class SpinLock {
public:
void lock() {
while (MLock.test_and_set(std::memory_order_acquire))
std::this_thread::yield();
}
void unlock() { MLock.clear(std::memory_order_release); }

private:
std::atomic_flag MLock{ATOMIC_FLAG_INIT};
};
} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ set(SYCL_SOURCES
"detail/event_impl.cpp"
"detail/filter_selector_impl.cpp"
"detail/force_device.cpp"
"detail/global_handler.cpp"
"detail/helpers.cpp"
"detail/handler_proxy.cpp"
"detail/image_accessor_util.cpp"
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <CL/sycl/detail/device_filter.hpp>
#include <CL/sycl/detail/pi.hpp>
#include <CL/sycl/info/info_desc.hpp>
#include <detail/global_handler.hpp>

#include <algorithm>
#include <array>
Expand Down Expand Up @@ -181,8 +182,7 @@ template <> class SYCLConfig<SYCL_DEVICE_FILTER> {

const char *ValStr = BaseT::getRawValue();
if (ValStr) {
static device_filter_list DFL{ValStr};
FilterList = &DFL;
FilterList = &GlobalHandler::instance().getDeviceFilterList(ValStr);
}

// TODO: remove the following code when we remove the support for legacy
Expand Down
139 changes: 139 additions & 0 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//==--------- global_handler.cpp --- Global objects handler ----------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl/detail/device_filter.hpp>
#include <CL/sycl/detail/spinlock.hpp>
#include <detail/global_handler.hpp>
#include <detail/platform_impl.hpp>
#include <detail/plugin.hpp>
#include <detail/program_manager/program_manager.hpp>
#include <detail/scheduler/scheduler.hpp>

#ifdef WIN32
#include <windows.h>
#endif

#include <vector>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
GlobalHandler::GlobalHandler() = default;
GlobalHandler::~GlobalHandler() = default;

GlobalHandler &GlobalHandler::instance() {
static GlobalHandler *SyclGlobalObjectsHandler = new GlobalHandler();
return *SyclGlobalObjectsHandler;
}

Scheduler &GlobalHandler::getScheduler() {
if (MScheduler)
return *MScheduler;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MScheduler)
MScheduler = std::make_unique<Scheduler>();

return *MScheduler;
}
ProgramManager &GlobalHandler::getProgramManager() {
if (MProgramManager)
return *MProgramManager;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MProgramManager)
MProgramManager = std::make_unique<ProgramManager>();

return *MProgramManager;
}
Sync &GlobalHandler::getSync() {
if (MSync)
return *MSync;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MSync)
MSync = std::make_unique<Sync>();

return *MSync;
}
std::vector<PlatformImplPtr> &GlobalHandler::getPlatformCache() {
if (MPlatformCache)
return *MPlatformCache;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MPlatformCache)
MPlatformCache = std::make_unique<std::vector<PlatformImplPtr>>();

return *MPlatformCache;
}
std::mutex &GlobalHandler::getPlatformMapMutex() {
if (MPlatformMapMutex)
return *MPlatformMapMutex;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MPlatformMapMutex)
MPlatformMapMutex = std::make_unique<std::mutex>();

return *MPlatformMapMutex;
}
std::mutex &GlobalHandler::getFilterMutex() {
if (MFilterMutex)
return *MFilterMutex;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MFilterMutex)
MFilterMutex = std::make_unique<std::mutex>();

return *MFilterMutex;
}
std::vector<plugin> &GlobalHandler::getPlugins() {
if (MPlugins)
return *MPlugins;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MPlugins)
MPlugins = std::make_unique<std::vector<plugin>>();

return *MPlugins;
}
device_filter_list &
GlobalHandler::getDeviceFilterList(const std::string &InitValue) {
if (MDeviceFilterList)
return *MDeviceFilterList;

const std::lock_guard<SpinLock> Lock{MFieldsLock};
if (!MDeviceFilterList)
MDeviceFilterList = std::make_unique<device_filter_list>(InitValue);

return *MDeviceFilterList;
}

void shutdown() { delete &GlobalHandler::instance(); }

#ifdef WIN32
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) {
// Perform actions based on the reason for calling.
switch (fdwReason) {
case DLL_PROCESS_DETACH:
shutdown();
break;
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
break;
}
return TRUE; // Successful DLL_PROCESS_ATTACH.
}
#else
// Setting maximum priority on destructor ensures it runs after all other global
// destructors.
__attribute__((destructor(65535))) static void syclUnload() { shutdown(); }
#endif
} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
80 changes: 80 additions & 0 deletions sycl/source/detail/global_handler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//==--------- global_handler.hpp --- Global objects handler ----------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/detail/spinlock.hpp>
#include <CL/sycl/detail/util.hpp>

#include <memory>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
class platform_impl;
class Scheduler;
class ProgramManager;
class Sync;
class plugin;
class device_filter_list;

using PlatformImplPtr = std::shared_ptr<platform_impl>;

/// Wrapper class for global data structures with non-trivial destructors.
///
/// As user code can call SYCL Runtime functions from destructor of global
/// objects, it is not safe for the runtime library to have global objects with
/// non-trivial destructors. Such destructors can be called any time after
/// exiting main, which may result in user application crashes. Instead,
/// complex global objects must be wrapped into GlobalHandler. Its instance
/// is stored on heap, and deallocated when the runtime library is being
/// unloaded.
///
/// There's no need to store trivial globals here, as no code for their
/// construction or destruction is generated anyway.
class GlobalHandler {
public:
/// \return a reference to a GlobalHandler singleton instance. Memory for
/// storing objects is allocated on first call. The reference is valid as long
/// as runtime library is loaded (i.e. untill `DllMain` or
/// `__attribute__((destructor))` is called).
static GlobalHandler &instance();

GlobalHandler(const GlobalHandler &) = delete;
GlobalHandler(GlobalHandler &&) = delete;

Scheduler &getScheduler();
ProgramManager &getProgramManager();
Sync &getSync();
std::vector<PlatformImplPtr> &getPlatformCache();
std::mutex &getPlatformMapMutex();
std::mutex &getFilterMutex();
std::vector<plugin> &getPlugins();
device_filter_list &getDeviceFilterList(const std::string &InitValue);

private:
friend void shutdown();
// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
GlobalHandler();
~GlobalHandler();

SpinLock MFieldsLock;

std::unique_ptr<Scheduler> MScheduler;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid truly knowing/enumerating all the objects handled in this wrapper, and just make it be a custom heap where arbitrary objects can be dealt with overloaded new/delete?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@smaslov-intel I was thinking about it. If we go with some custom memory space, we'd need some mechanism to call destructors on that memory. We'd either need to somehow store custom deleters and call them upon shutdown, or implement some other logic, where object is responsible for its destruction. Both look quite complicated solutions. Also, it's not clear how to access these objects. Use names and store pointers in a map? So, I decided to keep global wrapper with unique pointers for now. Adding a new global object shouldn't be that hard, and I don't expect us to add a new global object every other Tuesday as they're code smell anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a new global object shouldn't be that hard

But it is badly violating OOP encapsulation principle, I feel.

Also it makes it nearly impossible to use this global handler in the plugins (standalone parts of SYCL RT).
And then anything else we use for plugins would compete with 65535 destruction priority used for this data.

Having said that I admit this is definitely an improvement and we should go with it until something better is developed.

std::unique_ptr<ProgramManager> MProgramManager;
std::unique_ptr<Sync> MSync;
std::unique_ptr<std::vector<PlatformImplPtr>> MPlatformCache;
std::unique_ptr<std::mutex> MPlatformMapMutex;
std::unique_ptr<std::mutex> MFilterMutex;
std::unique_ptr<std::vector<plugin>> MPlugins;
std::unique_ptr<device_filter_list> MDeviceFilterList;
};
} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
11 changes: 3 additions & 8 deletions sycl/source/detail/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <CL/sycl/detail/device_filter.hpp>
#include <CL/sycl/detail/pi.hpp>
#include <detail/config.hpp>
#include <detail/global_handler.hpp>
#include <detail/plugin.hpp>

#include <bitset>
Expand Down Expand Up @@ -283,18 +284,12 @@ bool trace(TraceLevel Level) {
// Initializes all available Plugins.
const vector_class<plugin> &initialize() {
static std::once_flag PluginsInitDone;
static vector_class<plugin> *Plugins = nullptr;

std::call_once(PluginsInitDone, []() {
// The memory for "Plugins" is intentionally leaked because the application
// may call into the SYCL runtime from a global destructor, and such a call
// could eventually call down to initialize(). Therefore, there is no safe
// time when "Plugins" could be deleted.
Plugins = new vector_class<plugin>;
initializePlugins(Plugins);
initializePlugins(&GlobalHandler::instance().getPlugins());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: why repeat calls here instead of remembering the return value?

});

return *Plugins;
return GlobalHandler::instance().getPlugins();
}

static void initializePlugins(vector_class<plugin> *Plugins) {
Expand Down
13 changes: 8 additions & 5 deletions sycl/source/detail/platform_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
#include <detail/config.hpp>
#include <detail/device_impl.hpp>
#include <detail/force_device.hpp>
#include <detail/global_handler.hpp>
#include <detail/platform_impl.hpp>
#include <detail/platform_info.hpp>

#include <algorithm>
#include <cstring>
#include <regex>
#include <string>
#include <vector>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
Expand All @@ -32,12 +34,13 @@ PlatformImplPtr platform_impl::getHostPlatformImpl() {

PlatformImplPtr platform_impl::getOrMakePlatformImpl(RT::PiPlatform PiPlatform,
const plugin &Plugin) {
static std::vector<PlatformImplPtr> PlatformCache;
static std::mutex PlatformMapMutex;

PlatformImplPtr Result;
{
const std::lock_guard<std::mutex> Guard(PlatformMapMutex);
const std::lock_guard<std::mutex> Guard(
GlobalHandler::instance().getPlatformMapMutex());

std::vector<PlatformImplPtr> &PlatformCache =
GlobalHandler::instance().getPlatformCache();

// If we've already seen this platform, return the impl
for (const auto &PlatImpl : PlatformCache) {
Expand Down Expand Up @@ -323,7 +326,7 @@ platform_impl::get_devices(info::device_type DeviceType) const {
if (is_host() || DeviceType == info::device_type::host)
return Res;

pi_uint32 NumDevices;
pi_uint32 NumDevices = 0;
const detail::plugin &Plugin = getPlugin();
Plugin.call<PiApiKind::piDevicesGet>(
MPlatform, pi::cast<RT::PiDeviceType>(DeviceType), 0,
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/platform_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ namespace sycl {
// Forward declaration
class device_selector;
class device;
enum class aspect;

namespace detail {
class device_impl;

// TODO: implement extension management for host device
// TODO: implement parameters treatment for host device
Expand Down
5 changes: 2 additions & 3 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <detail/config.hpp>
#include <detail/context_impl.hpp>
#include <detail/device_impl.hpp>
#include <detail/global_handler.hpp>
#include <detail/program_impl.hpp>
#include <detail/program_manager/program_manager.hpp>
#include <detail/spec_constant_impl.hpp>
Expand Down Expand Up @@ -47,9 +48,7 @@ enum BuildState { BS_InProgress, BS_Done, BS_Failed };
static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");

ProgramManager &ProgramManager::getInstance() {
// The singleton ProgramManager instance, uses the "magic static" idiom.
static ProgramManager Instance;
return Instance;
return GlobalHandler::instance().getProgramManager();
}

static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,
Expand Down
Loading