-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL] Clear extensions functions cache upon context release #5282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
eeff988
6878a50
db047b8
65afd32
d212fe7
4320075
a020c33
52a6ad5
84502a7
2a1b3ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#ifndef _EXT_FUNCTION_INTEL | ||
#error Undefined _EXT_FUNCTION_INTEL macro expansion | ||
#endif | ||
|
||
#ifndef _EXT_FUNCTION | ||
#error Undefined _EXT_FUNCTION macro expansion | ||
#endif | ||
|
||
_EXT_FUNCTION_INTEL(clHostMemAlloc) | ||
_EXT_FUNCTION_INTEL(clDeviceMemAlloc) | ||
_EXT_FUNCTION_INTEL(clSharedMemAlloc) | ||
_EXT_FUNCTION_INTEL(clCreateBufferWithProperties) | ||
_EXT_FUNCTION_INTEL(clMemBlockingFree) | ||
_EXT_FUNCTION_INTEL(clMemFree) | ||
_EXT_FUNCTION_INTEL(clSetKernelArgMemPointer) | ||
_EXT_FUNCTION_INTEL(clEnqueueMemset) | ||
_EXT_FUNCTION_INTEL(clEnqueueMemcpy) | ||
_EXT_FUNCTION_INTEL(clGetMemAllocInfo) | ||
_EXT_FUNCTION(clGetDeviceFunctionPointer) | ||
_EXT_FUNCTION(clSetProgramSpecializationConstant) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,8 @@ | |
#include <iostream> | ||
#include <limits> | ||
#include <map> | ||
#include <memory> | ||
#include <mutex> | ||
#include <sstream> | ||
#include <string> | ||
#include <vector> | ||
|
@@ -71,19 +73,93 @@ CONSTFIX char clGetDeviceFunctionPointerName[] = | |
|
||
#undef CONSTFIX | ||
|
||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( | ||
cl_device_id device, cl_program program, const char *FuncName, | ||
cl_ulong *ret_ptr); | ||
|
||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( | ||
cl_program program, cl_uint spec_id, size_t spec_size, | ||
const void *spec_value); | ||
|
||
struct ExtFuncsPerContextT; | ||
|
||
namespace detail { | ||
template <const char *FuncName, typename FuncT> | ||
std::pair<FuncT &, bool &> get(ExtFuncsPerContextT &); | ||
} // namespace detail | ||
|
||
struct ExtFuncsPerContextT { | ||
#define _EXT_FUNCTION_INTEL(t_pfx) \ | ||
t_pfx##INTEL_fn t_pfx##Func = nullptr; \ | ||
bool t_pfx##Initialized = false; | ||
|
||
#define _EXT_FUNCTION(t_pfx) \ | ||
t_pfx##_fn t_pfx##Func = nullptr; \ | ||
bool t_pfx##Initialized = false; | ||
|
||
#include "ext_functions.inc" | ||
|
||
#undef _EXT_FUNCTION | ||
#undef _EXT_FUNCTION_INTEL | ||
|
||
std::mutex Mtx; | ||
|
||
template <const char *FuncName, typename FuncT> | ||
std::pair<FuncT &, bool &> get() { | ||
return detail::get<FuncName, FuncT>(*this); | ||
} | ||
}; | ||
|
||
namespace detail { | ||
|
||
#define _EXT_FUNCTION_COMMON(t_pfx, t_pfx_suff) \ | ||
template <> \ | ||
std::pair<t_pfx_suff##_fn &, bool &> get<t_pfx##Name, t_pfx_suff##_fn>( \ | ||
ExtFuncsPerContextT & Funcs) { \ | ||
using FPtrT = t_pfx_suff##_fn; \ | ||
std::pair<FPtrT &, bool &> Ret{Funcs.t_pfx##Func, \ | ||
Funcs.t_pfx##Initialized}; \ | ||
return Ret; \ | ||
} | ||
#define _EXT_FUNCTION_INTEL(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx##INTEL) | ||
#define _EXT_FUNCTION(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx) | ||
|
||
#include "ext_functions.inc" | ||
|
||
#undef _EXT_FUNCTION | ||
#undef _EXT_FUNCTION_INTEL | ||
#undef _EXT_FUNCTION_COMMON | ||
} // namespace detail | ||
|
||
struct ExtFuncsCachesT { | ||
std::map<pi_context, ExtFuncsPerContextT> Caches; | ||
std::mutex Mtx; | ||
}; | ||
|
||
ExtFuncsCachesT *ExtFuncsCaches = nullptr; | ||
|
||
// USM helper function to get an extension function pointer | ||
template <const char *FuncName, typename T> | ||
static pi_result getExtFuncFromContext(pi_context context, T *fptr) { | ||
// TODO | ||
// Potentially redo caching as PI interface changes. | ||
bader marked this conversation as resolved.
Show resolved
Hide resolved
|
||
thread_local static std::map<pi_context, T> FuncPtrs; | ||
ExtFuncsPerContextT *PerContext = nullptr; | ||
{ | ||
assert(ExtFuncsCaches); | ||
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx}; | ||
|
||
s-kanaev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
PerContext = &ExtFuncsCaches->Caches[context]; | ||
} | ||
|
||
std::lock_guard<std::mutex> Lock{PerContext->Mtx}; | ||
std::pair<T &, bool &> FuncInitialized = PerContext->get<FuncName, T>(); | ||
|
||
// if cached, return cached FuncPtr | ||
if (auto F = FuncPtrs[context]) { | ||
if (FuncInitialized.second) { | ||
// if cached that extension is not available return nullptr and | ||
// PI_INVALID_VALUE | ||
*fptr = F; | ||
return F ? PI_SUCCESS : PI_INVALID_VALUE; | ||
*fptr = FuncInitialized.first; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure we get a lot from having lazy initialization for these function pointers. Suggest moving all the initialization to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per what was discussed with @steffenlarsen , the main drawback of constant (as opposed to lazy) init in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm OK with leaving as is. But still think that if an application creates lots of context this initialization will be that last thing we should worry about. Also this would make "common" case faster and code more simpler because less mutexes and checks should be needed. |
||
return *fptr ? PI_SUCCESS : PI_INVALID_VALUE; | ||
} | ||
|
||
cl_uint deviceCount; | ||
|
@@ -115,14 +191,17 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) { | |
T FuncPtr = | ||
(T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); | ||
|
||
// We're about to store the cached value. Mark this cache entry initialized. | ||
FuncInitialized.second = true; | ||
|
||
if (!FuncPtr) { | ||
// Cache that the extension is not available | ||
FuncPtrs[context] = nullptr; | ||
FuncInitialized.first = nullptr; | ||
return PI_INVALID_VALUE; | ||
} | ||
|
||
FuncInitialized.first = FuncPtr; | ||
*fptr = FuncPtr; | ||
FuncPtrs[context] = FuncPtr; | ||
|
||
return cast<pi_result>(ret_err); | ||
} | ||
|
@@ -561,9 +640,6 @@ static bool is_in_separated_string(const std::string &str, char delimiter, | |
return false; | ||
} | ||
|
||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)( | ||
cl_device_id device, cl_program program, const char *FuncName, | ||
cl_ulong *ret_ptr); | ||
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, | ||
const char *func_name, | ||
pi_uint64 *function_pointer_ret) { | ||
|
@@ -1304,10 +1380,6 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, | |
} | ||
} | ||
|
||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)( | ||
cl_program program, cl_uint spec_id, size_t spec_size, | ||
const void *spec_value); | ||
|
||
pi_result piextProgramSetSpecializationConstant(pi_program prog, | ||
pi_uint32 spec_id, | ||
size_t spec_size, | ||
|
@@ -1383,9 +1455,21 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, | |
// pi_level_zero.cpp for reference) Currently this is just a NOOP. | ||
pi_result piTearDown(void *PluginParameter) { | ||
(void)PluginParameter; | ||
delete ExtFuncsCaches; | ||
ExtFuncsCaches = nullptr; | ||
return PI_SUCCESS; | ||
} | ||
|
||
pi_result piContextRelease(pi_context Context) { | ||
{ | ||
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx}; | ||
|
||
ExtFuncsCaches->Caches.erase(Context); | ||
} | ||
|
||
return cast<pi_result>(clReleaseContext(cast<cl_context>(Context))); | ||
} | ||
|
||
pi_result piPluginInit(pi_plugin *PluginInit) { | ||
int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); | ||
if (CompareVersions < 0) { | ||
|
@@ -1397,6 +1481,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { | |
// PI interface supports higher version or the same version. | ||
strncpy(PluginInit->PluginVersion, SupportedVersion, 4); | ||
|
||
ExtFuncsCaches = new ExtFuncsCachesT; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use a smart pointer? https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#r3-a-raw-pointer-a-t-is-non-owning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be an option if the following drawback is resolved. Smart pointer (both Probably, @alexbatashev has smth to comment here if I'm wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Could you clarify the scenario when "mem leak or anything else related to order of destructors calls" happen and how using a raw pointer solves the problems in such scenarios? From POV, you can always call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The mem-leak and destructor ordering I refer here are not related to the owned object, i.e. cache, but to the smart pointer itself. Sure thing. When smart pointer is used I'll put call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we change the type of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is that possible? Isn't calling functions from unloaded sharing library is UB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
A global object can be destroyed earlier then a library is unloaded. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, so I expected that plug-in has a single global object managing the lifetime of plug-in data ("plug-in context") and after this global object is destroyed, there can't be any "calls to an API which uses this var". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
How this can be guaranteed? |
||
|
||
#define _PI_CL(pi_api, ocl_api) \ | ||
(PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); | ||
|
||
|
@@ -1420,7 +1506,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { | |
_PI_CL(piContextCreate, piContextCreate) | ||
_PI_CL(piContextGetInfo, clGetContextInfo) | ||
_PI_CL(piContextRetain, clRetainContext) | ||
_PI_CL(piContextRelease, clReleaseContext) | ||
_PI_CL(piContextRelease, piContextRelease) | ||
_PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle) | ||
_PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) | ||
// Queue | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why plural form?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's plural because each context has it's own cache.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be reasonable to call the whole map a cache instead of ExtFuncsPerContextT instances.