Skip to content

Commit eeff988

Browse files
author
Sergey Kanaev
committed
[SYCL] Clear extensions functions cache upon context release
This is to eliminate reuse of invalid cached values after context being released. Signed-off-by: Sergey Kanaev <[email protected]>
1 parent 1e55cf3 commit eeff988

File tree

1 file changed

+136
-14
lines changed

1 file changed

+136
-14
lines changed

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 136 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include <iostream>
2626
#include <limits>
2727
#include <map>
28+
#include <memory>
29+
#include <mutex>
2830
#include <sstream>
2931
#include <string>
3032
#include <vector>
@@ -71,19 +73,105 @@ CONSTFIX char clGetDeviceFunctionPointerName[] =
7173

7274
#undef CONSTFIX
7375

76+
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
77+
cl_device_id device, cl_program program, const char *FuncName,
78+
cl_ulong *ret_ptr);
79+
80+
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
81+
cl_program program, cl_uint spec_id, size_t spec_size,
82+
const void *spec_value);
83+
84+
// For the time being, cache is split into multiple maps of type
85+
// `context -> function_type'.
86+
// There's another way. A mapping of context to collection of function pointers.
87+
// Though, the former design allows for simultaneous access for different
88+
// function pointer for different contexts.
89+
template <const char *FuncName, typename FuncT>
90+
struct ExtFuncCache {
91+
std::map<pi_context, FuncT> Cache;
92+
// FIXME Use spin-lock to make lock/unlock faster and w/o context switching
93+
std::mutex Mtx;
94+
};
95+
96+
struct ExtFuncCacheCollection;
97+
98+
namespace detail {
99+
template <const char *FuncName, typename FuncT>
100+
ExtFuncCache<FuncName, FuncT> &get(::ExtFuncCacheCollection &);
101+
} // namespace detail
102+
103+
struct ExtFuncCacheCollection {
104+
template <const char *FuncName, typename FuncT>
105+
ExtFuncCache<FuncName, FuncT> &get() {
106+
return detail::get<FuncName, FuncT>(*this);
107+
}
108+
109+
#define DEFINE_INTEL(t_pfx) \
110+
ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> t_pfx ## _Cache
111+
#define DEFINE(t_pfx) \
112+
ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> t_pfx ## _Cache
113+
114+
DEFINE_INTEL(clHostMemAlloc);
115+
DEFINE_INTEL(clDeviceMemAlloc);
116+
DEFINE_INTEL(clSharedMemAlloc);
117+
DEFINE_INTEL(clCreateBufferWithProperties);
118+
DEFINE_INTEL(clMemBlockingFree);
119+
DEFINE_INTEL(clMemFree);
120+
DEFINE_INTEL(clSetKernelArgMemPointer);
121+
DEFINE_INTEL(clEnqueueMemset);
122+
DEFINE_INTEL(clEnqueueMemcpy);
123+
DEFINE_INTEL(clGetMemAllocInfo);
124+
DEFINE(clGetDeviceFunctionPointer);
125+
DEFINE(clSetProgramSpecializationConstant);
126+
#undef DEFINE
127+
#undef DEFINE_INTEL
128+
};
129+
130+
namespace detail {
131+
#define DEFINE_GETTER_INTEL(t_pfx) \
132+
template<> ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> &get<t_pfx ## Name, t_pfx ## INTEL_fn>(::ExtFuncCacheCollection &C) { \
133+
return C.t_pfx ## _Cache; \
134+
}
135+
#define DEFINE_GETTER(t_pfx) \
136+
template<> ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> &get<t_pfx ## Name, t_pfx ## _fn>(::ExtFuncCacheCollection &C) { \
137+
return C.t_pfx ## _Cache; \
138+
}
139+
140+
DEFINE_GETTER_INTEL(clHostMemAlloc)
141+
DEFINE_GETTER_INTEL(clDeviceMemAlloc)
142+
DEFINE_GETTER_INTEL(clSharedMemAlloc)
143+
DEFINE_GETTER_INTEL(clCreateBufferWithProperties)
144+
DEFINE_GETTER_INTEL(clMemBlockingFree)
145+
DEFINE_GETTER_INTEL(clMemFree)
146+
DEFINE_GETTER_INTEL(clSetKernelArgMemPointer)
147+
DEFINE_GETTER_INTEL(clEnqueueMemset)
148+
DEFINE_GETTER_INTEL(clEnqueueMemcpy)
149+
DEFINE_GETTER_INTEL(clGetMemAllocInfo)
150+
DEFINE_GETTER(clGetDeviceFunctionPointer)
151+
DEFINE_GETTER(clSetProgramSpecializationConstant)
152+
#undef DEFINE_GETTER
153+
#undef DEFINE_GETTER_INTEL
154+
} // namespace detail
155+
156+
ExtFuncCacheCollection *ExtFuncCaches = nullptr;
157+
74158
// USM helper function to get an extension function pointer
75159
template <const char *FuncName, typename T>
76160
static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
77161
// TODO
78162
// Potentially redo caching as PI interface changes.
79-
thread_local static std::map<pi_context, T> FuncPtrs;
163+
ExtFuncCache<FuncName, T> &Cache = ExtFuncCaches->get<FuncName, T>();
164+
165+
std::lock_guard<std::mutex> CacheLock{Cache.Mtx};
166+
167+
auto It = Cache.Cache.find(context);
80168

81169
// if cached, return cached FuncPtr
82-
if (auto F = FuncPtrs[context]) {
170+
if (It != Cache.Cache.end()) {
83171
// if cached that extension is not available return nullptr and
84172
// PI_INVALID_VALUE
85-
*fptr = F;
86-
return F ? PI_SUCCESS : PI_INVALID_VALUE;
173+
*fptr = It->second;
174+
return It->second ? PI_SUCCESS : PI_INVALID_VALUE;
87175
}
88176

89177
cl_uint deviceCount;
@@ -117,12 +205,12 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
117205

118206
if (!FuncPtr) {
119207
// Cache that the extension is not available
120-
FuncPtrs[context] = nullptr;
208+
Cache.Cache[context] = nullptr;
121209
return PI_INVALID_VALUE;
122210
}
123211

124212
*fptr = FuncPtr;
125-
FuncPtrs[context] = FuncPtr;
213+
Cache.Cache[context] = FuncPtr;
126214

127215
return cast<pi_result>(ret_err);
128216
}
@@ -561,9 +649,6 @@ static bool is_in_separated_string(const std::string &str, char delimiter,
561649
return false;
562650
}
563651

564-
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
565-
cl_device_id device, cl_program program, const char *FuncName,
566-
cl_ulong *ret_ptr);
567652
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
568653
const char *func_name,
569654
pi_uint64 *function_pointer_ret) {
@@ -1304,10 +1389,6 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name,
13041389
}
13051390
}
13061391

1307-
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
1308-
cl_program program, cl_uint spec_id, size_t spec_size,
1309-
const void *spec_value);
1310-
13111392
pi_result piextProgramSetSpecializationConstant(pi_program prog,
13121393
pi_uint32 spec_id,
13131394
size_t spec_size,
@@ -1383,9 +1464,48 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
13831464
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
13841465
pi_result piTearDown(void *PluginParameter) {
13851466
(void)PluginParameter;
1467+
delete ExtFuncCaches;
1468+
ExtFuncCaches = nullptr;
13861469
return PI_SUCCESS;
13871470
}
13881471

1472+
pi_result piContextRelease(pi_context Context) {
1473+
#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \
1474+
{ \
1475+
ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> &Cache = ExtFuncCaches->get<t_pfx ## Name, t_pfx ## INTEL_fn>(); \
1476+
std::lock_guard<std::mutex> CacheLock{Cache.Mtx}; \
1477+
auto It = Cache.Cache.find(Context); \
1478+
if (It != Cache.Cache.end()) \
1479+
Cache.Cache.erase(It); \
1480+
}
1481+
#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \
1482+
{ \
1483+
ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> &Cache = ExtFuncCaches->get<t_pfx ## Name, t_pfx ## _fn>(); \
1484+
std::lock_guard<std::mutex> CacheLock{Cache.Mtx}; \
1485+
auto It = Cache.Cache.find(Context); \
1486+
if (It != Cache.Cache.end()) \
1487+
Cache.Cache.erase(It); \
1488+
}
1489+
1490+
1491+
RELEASE_EXT_FUNCS_CACHE_INTEL(clHostMemAlloc);
1492+
RELEASE_EXT_FUNCS_CACHE_INTEL(clDeviceMemAlloc);
1493+
RELEASE_EXT_FUNCS_CACHE_INTEL(clSharedMemAlloc);
1494+
RELEASE_EXT_FUNCS_CACHE_INTEL(clCreateBufferWithProperties);
1495+
RELEASE_EXT_FUNCS_CACHE_INTEL(clMemBlockingFree);
1496+
RELEASE_EXT_FUNCS_CACHE_INTEL(clMemFree);
1497+
RELEASE_EXT_FUNCS_CACHE_INTEL(clSetKernelArgMemPointer);
1498+
RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemset);
1499+
RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemcpy);
1500+
RELEASE_EXT_FUNCS_CACHE_INTEL(clGetMemAllocInfo);
1501+
RELEASE_EXT_FUNCS_CACHE(clGetDeviceFunctionPointer);
1502+
RELEASE_EXT_FUNCS_CACHE(clSetProgramSpecializationConstant);
1503+
#undef RELEASE_EXT_FUNCS_CACHE
1504+
#undef RELEASE_EXT_FUNCS_CACHE_INTEL
1505+
1506+
return cast<pi_result>(clReleaseContext(cast<cl_context>(Context)));
1507+
}
1508+
13891509
pi_result piPluginInit(pi_plugin *PluginInit) {
13901510
int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion);
13911511
if (CompareVersions < 0) {
@@ -1397,6 +1517,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
13971517
// PI interface supports higher version or the same version.
13981518
strncpy(PluginInit->PluginVersion, SupportedVersion, 4);
13991519

1520+
ExtFuncCaches = new ExtFuncCacheCollection;
1521+
14001522
#define _PI_CL(pi_api, ocl_api) \
14011523
(PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
14021524

@@ -1420,7 +1542,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
14201542
_PI_CL(piContextCreate, piContextCreate)
14211543
_PI_CL(piContextGetInfo, clGetContextInfo)
14221544
_PI_CL(piContextRetain, clRetainContext)
1423-
_PI_CL(piContextRelease, clReleaseContext)
1545+
_PI_CL(piContextRelease, piContextRelease)
14241546
_PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle)
14251547
_PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle)
14261548
// Queue

0 commit comments

Comments
 (0)