Skip to content

Commit fe07d0e

Browse files
[SYCL] Windows Proxy Loader for DLLs (#8242)
DLLs manually loaded by SYCL are not tracked as direct dependencies in the same way that linked DLLs are. This means these DLL may be unloaded before SYCLs shutdown() routine is called, which will lead to problems when that routine tries to call those DLL to release resources. This PR adds a new proxy DLL that is a linked dependency of SYCL itself. This proxy DLL loads all the SYCL manually loaded DLLs early, before SYCL itself is loaded, and conversely, they are not unloaded until the proxy itself unloads, which is after SYCL unloads. So now the manually loaded plugin DLLs will be resident when shutdown() is called and piTearDown can complete safely and successfully. I had a previous PR for this work ( #7756 ), but it encountered interference with a difference in how Windows handles threads and their termination. I'm addressing that problem separately. In this version, I am reducing the shutdown() procedure on Windows to only release the plugins and nothing else. This avoids the issue for now. Tests are at intel/llvm-test-suite#1465 --------- Signed-off-by: Chris Perkins <[email protected]>
1 parent 8382e58 commit fe07d0e

File tree

19 files changed

+462
-29
lines changed

19 files changed

+462
-29
lines changed

sycl/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ add_custom_target( sycl-toolchain
314314
DEPENDS sycl-runtime-libraries
315315
sycl-compiler
316316
sycl-ls
317+
win_proxy_loader
317318
${XPTIFW_LIBS}
318319
COMMENT "Building SYCL compiler toolchain..."
319320
)
@@ -350,6 +351,8 @@ add_subdirectory( plugins )
350351

351352
add_subdirectory(tools)
352353

354+
add_subdirectory(win_proxy_loader)
355+
353356
if(SYCL_INCLUDE_TESTS)
354357
if(NOT LLVM_INCLUDE_TESTS)
355358
message(FATAL_ERROR
@@ -392,6 +395,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS
392395
sycl
393396
libsycldevice
394397
level-zero-sycl-dev
398+
win_proxy_loader
395399
${XPTIFW_LIBS}
396400
${SYCL_TOOLCHAIN_DEPS}
397401
)

sycl/include/sycl/detail/pi.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ enum TraceLevel {
6262
bool trace(TraceLevel level);
6363

6464
#ifdef __SYCL_RT_OS_WINDOWS
65+
// these same constants are used by win_proxy_loader.dll
66+
// if a plugin is added here, add it there as well.
6567
#ifdef _MSC_VER
6668
#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll"
6769
#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll"
@@ -150,11 +152,11 @@ __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
150152

151153
// Function to load the shared library
152154
// Implementation is OS dependent.
153-
void *loadOsLibrary(const std::string &Library);
155+
void *loadOsPluginLibrary(const std::string &Library);
154156

155157
// Function to unload the shared library
156158
// Implementation is OS dependent (see posix-pi.cpp and windows-pi.cpp)
157-
int unloadOsLibrary(void *Library);
159+
int unloadOsPluginLibrary(void *Library);
158160

159161
// OS agnostic function to unload the shared library
160162
int unloadPlugin(void *Library);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//==------------ common_win_pi_trace.hpp - SYCL standard header file -------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// this .hpp is injected. Be sure to define __SYCL_PLUGIN_DLL_NAME before
10+
// including
11+
#ifdef _WIN32
12+
#include <windows.h>
13+
BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module
14+
DWORD fdwReason, // reason for calling function
15+
LPVOID lpReserved) { // reserved
16+
17+
bool PrintPiTrace = false;
18+
static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
19+
static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
20+
if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
21+
PrintPiTrace = true;
22+
}
23+
24+
// Perform actions based on the reason for calling.
25+
switch (fdwReason) {
26+
case DLL_PROCESS_DETACH:
27+
if (PrintPiTrace)
28+
std::cout << "---> DLL_PROCESS_DETACH " << __SYCL_PLUGIN_DLL_NAME << "\n"
29+
<< std::endl;
30+
31+
break;
32+
case DLL_PROCESS_ATTACH:
33+
if (PrintPiTrace)
34+
std::cout << "---> DLL_PROCESS_ATTACH " << __SYCL_PLUGIN_DLL_NAME << "\n"
35+
<< std::endl;
36+
case DLL_THREAD_ATTACH:
37+
case DLL_THREAD_DETACH:
38+
break;
39+
}
40+
return TRUE;
41+
}
42+
#endif // WIN32

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5553,6 +5553,10 @@ pi_result cuda_piextEnqueueDeviceGlobalVariableRead(
55535553
}
55545554

55555555
// This API is called by Sycl RT to notify the end of the plugin lifetime.
5556+
// Windows: dynamically loaded plugins might have been unloaded already
5557+
// when this is called. Sycl RT holds onto the PI plugin so it can be
5558+
// called safely. But this is not transitive. If the PI plugin in turn
5559+
// dynamically loaded a different DLL, that may have been unloaded.
55565560
// TODO: add a global variable lifetime management code here (see
55575561
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
55585562
pi_result cuda_piTearDown(void *) {
@@ -5745,4 +5749,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
57455749
return PI_SUCCESS;
57465750
}
57475751

5752+
#ifdef _WIN32
5753+
#define __SYCL_PLUGIN_DLL_NAME "pi_cuda.dll"
5754+
#include "../common_win_pi_trace/common_win_pi_trace.hpp"
5755+
#undef __SYCL_PLUGIN_DLL_NAME
5756+
#endif
5757+
57485758
} // extern "C"

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,6 +2048,10 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) {
20482048
return PI_SUCCESS;
20492049
}
20502050

2051+
// Windows: dynamically loaded plugins might have been unloaded already
2052+
// when this is called. Sycl RT holds onto the PI plugin so it can be
2053+
// called safely. But this is not transitive. If the PI plugin in turn
2054+
// dynamically loaded a different DLL, that may have been unloaded.
20512055
pi_result piTearDown(void *) {
20522056
delete reinterpret_cast<sycl::detail::ESIMDEmuPluginOpaqueData *>(
20532057
PiESimdDeviceAccess->data);
@@ -2102,4 +2106,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
21022106
return PI_SUCCESS;
21032107
}
21042108

2109+
#ifdef _WIN32
2110+
#define __SYCL_PLUGIN_DLL_NAME "pi_esimd_emulator.dll"
2111+
#include "../common_win_pi_trace/common_win_pi_trace.hpp"
2112+
#undef __SYCL_PLUGIN_DLL_NAME
2113+
#endif
2114+
21052115
} // extern C

sycl/plugins/hip/pi_hip.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5321,6 +5321,10 @@ pi_result hip_piextEnqueueDeviceGlobalVariableRead(
53215321
}
53225322

53235323
// This API is called by Sycl RT to notify the end of the plugin lifetime.
5324+
// Windows: dynamically loaded plugins might have been unloaded already
5325+
// when this is called. Sycl RT holds onto the PI plugin so it can be
5326+
// called safely. But this is not transitive. If the PI plugin in turn
5327+
// dynamically loaded a different DLL, that may have been unloaded.
53245328
// TODO: add a global variable lifetime management code here (see
53255329
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
53265330
pi_result hip_piTearDown(void *PluginParameter) {
@@ -5513,6 +5517,12 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
55135517
return PI_SUCCESS;
55145518
}
55155519

5520+
#ifdef _WIN32
5521+
#define __SYCL_PLUGIN_DLL_NAME "pi_hip.dll"
5522+
#include "../common_win_pi_trace/common_win_pi_trace.hpp"
5523+
#undef __SYCL_PLUGIN_DLL_NAME
5524+
#endif
5525+
55165526
} // extern "C"
55175527

55185528
hipEvent_t _pi_platform::evBase_{nullptr};

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7986,6 +7986,10 @@ pi_result piextPluginGetOpaqueData(void *opaque_data_param,
79867986
}
79877987

79887988
// SYCL RT calls this api to notify the end of plugin lifetime.
7989+
// Windows: dynamically loaded plugins might have been unloaded already
7990+
// when this is called. Sycl RT holds onto the PI plugin so it can be
7991+
// called safely. But this is not transitive. If the PI plugin in turn
7992+
// dynamically loaded a different DLL, that may have been unloaded.
79897993
// It can include all the jobs to tear down resources before
79907994
// the plugin is unloaded from memory.
79917995
pi_result piTearDown(void *PluginParameter) {
@@ -8369,4 +8373,10 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
83698373
}
83708374
return PI_SUCCESS;
83718375
}
8376+
8377+
#ifdef _WIN32
8378+
#define __SYCL_PLUGIN_DLL_NAME "pi_level_zero.dll"
8379+
#include "../common_win_pi_trace/common_win_pi_trace.hpp"
8380+
#undef __SYCL_PLUGIN_DLL_NAME
8381+
#endif
83728382
} // extern "C"

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,10 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
17451745
}
17461746

17471747
// This API is called by Sycl RT to notify the end of the plugin lifetime.
1748+
// Windows: dynamically loaded plugins might have been unloaded already
1749+
// when this is called. Sycl RT holds onto the PI plugin so it can be
1750+
// called safely. But this is not transitive. If the PI plugin in turn
1751+
// dynamically loaded a different DLL, that may have been unloaded.
17481752
// TODO: add a global variable lifetime management code here (see
17491753
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
17501754
pi_result piTearDown(void *PluginParameter) {
@@ -1941,4 +1945,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
19411945
return PI_SUCCESS;
19421946
}
19431947

1948+
#ifdef _WIN32
1949+
#define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll"
1950+
#include "../common_win_pi_trace/common_win_pi_trace.hpp"
1951+
#undef __SYCL_PLUGIN_DLL_NAME
1952+
#endif
1953+
19441954
} // end extern 'C'

sycl/source/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME)
7070
target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB})
7171
endif()
7272

73+
# win_proxy_loader
74+
include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/win_proxy_loader)
75+
if(WIN_DUPE)
76+
target_link_libraries(${LIB_NAME} PUBLIC win_proxy_loaderd)
77+
else()
78+
target_link_libraries(${LIB_NAME} PUBLIC win_proxy_loader)
79+
endif()
80+
7381
target_compile_definitions(${LIB_OBJ_NAME} PRIVATE __SYCL_INTERNAL_API )
7482

7583
if (WIN32)
@@ -232,11 +240,13 @@ if (MSVC)
232240
string(REGEX REPLACE "/MT" "" ${flag_var} "${${flag_var}}")
233241
endforeach()
234242

243+
set(WIN_DUPE "1")
235244
if (SYCL_ENABLE_XPTI_TRACING)
236245
add_sycl_rt_library(sycl${SYCL_MAJOR_VERSION}d sycld_object XPTI_LIB xptid COMPILE_OPTIONS "/MDd" SOURCES ${SYCL_SOURCES})
237246
else()
238247
add_sycl_rt_library(sycl${SYCL_MAJOR_VERSION}d sycld_object COMPILE_OPTIONS "/MDd" SOURCES ${SYCL_SOURCES})
239248
endif()
249+
unset(WIN_DUPE)
240250
add_library(sycld ALIAS sycl${SYCL_MAJOR_VERSION}d)
241251

242252
set(SYCL_EXTRA_OPTS "/MD")

sycl/source/detail/context_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ context_impl::~context_impl() {
136136
}
137137
if (!MHostContext) {
138138
// TODO catch an exception and put it to list of asynchronous exceptions
139-
getPlugin().call<PiApiKind::piContextRelease>(MContext);
139+
getPlugin().call_nocheck<PiApiKind::piContextRelease>(MContext);
140140
}
141141
}
142142

sycl/source/detail/global_handler.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ void GlobalHandler::releaseDefaultContexts() {
187187
// finished. To avoid calls to nowhere, intentionally leak platform to device
188188
// cache. This will prevent destructors from being called, thus no PI cleanup
189189
// routines will be called in the end.
190+
// Update: the win_proxy_loader addresses this for SYCL's own dependencies,
191+
// but the GPU device dlls seem to manually load yet another DLL which may
192+
// have been released when this function is called. So we still release() and
193+
// leak until that is addressed. context destructs fine on CPU device.
190194
MPlatformToDefaultContextCache.Inst.release();
191195
#endif
192196
}
@@ -234,6 +238,18 @@ void GlobalHandler::drainThreadPool() {
234238
MHostTaskThreadPool.Inst->drain();
235239
}
236240

241+
#ifdef _WIN32
242+
// because of something not-yet-understood on Windows
243+
// threads may be shutdown once the end of main() is reached
244+
// making an orderly shutdown difficult. Fortunately, Windows
245+
// itself is very aggressive about reclaiming memory. Thus,
246+
// we focus solely on unloading the plugins, so as to not
247+
// accidentally retain device handles. etc
248+
void shutdown(){
249+
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
250+
Handler->unloadPlugins();
251+
}
252+
#else
237253
void shutdown() {
238254
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
239255
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
@@ -268,18 +284,36 @@ void shutdown() {
268284
delete Handler;
269285
Handler = nullptr;
270286
}
287+
#endif
271288

272289
#ifdef _WIN32
273290
extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
274291
DWORD fdwReason,
275292
LPVOID lpReserved) {
293+
bool PrintPiTrace = false;
294+
static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
295+
static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
296+
if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
297+
PrintPiTrace = true;
298+
}
299+
276300
// Perform actions based on the reason for calling.
277301
switch (fdwReason) {
278302
case DLL_PROCESS_DETACH:
279-
if (!lpReserved)
280-
shutdown();
303+
if (PrintPiTrace)
304+
std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl;
305+
306+
#ifdef XPTI_ENABLE_INSTRUMENTATION
307+
if (xptiTraceEnabled())
308+
return TRUE; // When doing xpti tracing, we can't safely call shutdown.
309+
// TODO: figure out what XPTI is doing that prevents release.
310+
#endif
311+
312+
shutdown();
281313
break;
282314
case DLL_PROCESS_ATTACH:
315+
if (PrintPiTrace)
316+
std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl;
283317
case DLL_THREAD_ATTACH:
284318
case DLL_THREAD_DETACH:
285319
break;

sycl/source/detail/online_compiler/online_compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType,
9494
#else
9595
static const std::string OclocLibraryName = "libocloc.so";
9696
#endif
97-
void *OclocLibrary = sycl::detail::pi::loadOsLibrary(OclocLibraryName);
97+
void *OclocLibrary = sycl::detail::pi::loadOsPluginLibrary(OclocLibraryName);
9898
if (!OclocLibrary)
9999
throw online_compile_error("Cannot load ocloc library: " +
100100
OclocLibraryName);

sycl/source/detail/os_util.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ OSModuleHandle OSUtil::getOSModuleHandle(const void *VirtAddr) {
194194
}
195195

196196
/// Returns an absolute path where the object was found.
197+
// win_proxy_loader.dll uses this same logic. If it is changed
198+
// significantly, it might be wise to change it there too.
197199
std::string OSUtil::getCurrentDSODir() {
198200
char Path[MAX_PATH];
199201
Path[0] = '\0';

sycl/source/detail/pi.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,12 @@ std::vector<std::pair<std::string, backend>> findPlugins() {
364364
// Load the Plugin by calling the OS dependent library loading call.
365365
// Return the handle to the Library.
366366
void *loadPlugin(const std::string &PluginPath) {
367-
return loadOsLibrary(PluginPath);
367+
return loadOsPluginLibrary(PluginPath);
368368
}
369369

370370
// Unload the given plugin by calling teh OS-specific library unloading call.
371371
// \param Library OS-specific library handle created when loading.
372-
int unloadPlugin(void *Library) { return unloadOsLibrary(Library); }
372+
int unloadPlugin(void *Library) { return unloadOsPluginLibrary(Library); }
373373

374374
// Binds all the PI Interface APIs to Plugin Library Function Addresses.
375375
// TODO: Remove the 'OclPtr' extension to PI_API.

sycl/source/detail/posix_pi.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace sycl {
1616
__SYCL_INLINE_VER_NAMESPACE(_V1) {
1717
namespace detail::pi {
1818

19-
void *loadOsLibrary(const std::string &PluginPath) {
19+
void *loadOsPluginLibrary(const std::string &PluginPath) {
2020
// TODO: Check if the option RTLD_NOW is correct. Explore using
2121
// RTLD_DEEPBIND option when there are multiple plugins.
2222
void *so = dlopen(PluginPath.c_str(), RTLD_NOW);
@@ -28,7 +28,7 @@ void *loadOsLibrary(const std::string &PluginPath) {
2828
return so;
2929
}
3030

31-
int unloadOsLibrary(void *Library) {
31+
int unloadOsPluginLibrary(void *Library) {
3232
// The mock plugin does not have an associated library, so we allow nullptr
3333
// here to avoid it trying to free a non-existent library.
3434
if (!Library)

0 commit comments

Comments
 (0)