Skip to content

Commit 65afd32

Browse files
author
Sergey Kanaev
committed
Change structure of cache to reduce number of locks
Signed-off-by: Sergey Kanaev <[email protected]>
1 parent db047b8 commit 65afd32

File tree

2 files changed

+86
-101
lines changed

2 files changed

+86
-101
lines changed

sycl/plugins/opencl/ext_functions.inc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef _EXT_FUNCTION_INTEL
2+
#error Undefined _EXT_FUNCTION_INTEL macro expanstion
3+
#endif
4+
5+
#ifndef _EXT_FUNCTION
6+
#error Undefined _EXT_FUNCTION macro expanstion
7+
#endif
8+
9+
_EXT_FUNCTION_INTEL(clHostMemAlloc)
10+
_EXT_FUNCTION_INTEL(clDeviceMemAlloc)
11+
_EXT_FUNCTION_INTEL(clSharedMemAlloc)
12+
_EXT_FUNCTION_INTEL(clCreateBufferWithProperties)
13+
_EXT_FUNCTION_INTEL(clMemBlockingFree)
14+
_EXT_FUNCTION_INTEL(clMemFree)
15+
_EXT_FUNCTION_INTEL(clSetKernelArgMemPointer)
16+
_EXT_FUNCTION_INTEL(clEnqueueMemset)
17+
_EXT_FUNCTION_INTEL(clEnqueueMemcpy)
18+
_EXT_FUNCTION_INTEL(clGetMemAllocInfo)
19+
_EXT_FUNCTION(clGetDeviceFunctionPointer)
20+
_EXT_FUNCTION(clSetProgramSpecializationConstant)

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 66 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -81,99 +81,90 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
8181
cl_program program, cl_uint spec_id, size_t spec_size,
8282
const void *spec_value);
8383

84-
// For the time being, cache is split into multiple maps of type
85-
// `context -> function_type'.
86-
// There's another way. A mapping of context to collection of function pointers.
87-
// Though, the former design allows for simultaneous access for different
88-
// function pointer for different contexts.
89-
template <const char *FuncName, typename FuncT> struct ExtFuncCache {
90-
std::map<pi_context, FuncT> Cache;
91-
// FIXME Use spin-lock to make lock/unlock faster and w/o context switching
92-
std::mutex Mtx;
93-
};
94-
95-
struct ExtFuncCacheCollection;
84+
struct ExtFuncsPerContextT;
9685

9786
namespace detail {
98-
template <const char *FuncName, typename FuncT>
99-
ExtFuncCache<FuncName, FuncT> &get(::ExtFuncCacheCollection &);
87+
template <const char *FuncName, typename FuncT>
88+
std::pair<FuncT &, bool &> get(ExtFuncsPerContextT &);
10089
} // namespace detail
10190

102-
struct ExtFuncCacheCollection {
91+
struct ExtFuncsPerContextT {
92+
#define _EXT_FUNCTION_INTEL(t_pfx) \
93+
t_pfx##INTEL_fn t_pfx##Func = nullptr; \
94+
bool t_pfx##Initialized = false;
95+
96+
#define _EXT_FUNCTION(t_pfx) \
97+
t_pfx##_fn t_pfx##Func = nullptr; \
98+
bool t_pfx##Initialized = false;
99+
100+
#include "ext_functions.inc"
101+
102+
#undef _EXT_FUNCTION
103+
#undef _EXT_FUNCTION_INTEL
104+
105+
std::mutex Mtx;
106+
103107
template <const char *FuncName, typename FuncT>
104-
ExtFuncCache<FuncName, FuncT> &get() {
108+
std::pair<FuncT &, bool &> get() {
105109
return detail::get<FuncName, FuncT>(*this);
106110
}
107-
108-
#define DEFINE_INTEL(t_pfx) \
109-
ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> t_pfx##_Cache
110-
#define DEFINE(t_pfx) ExtFuncCache<t_pfx##Name, t_pfx##_fn> t_pfx##_Cache
111-
112-
DEFINE_INTEL(clHostMemAlloc);
113-
DEFINE_INTEL(clDeviceMemAlloc);
114-
DEFINE_INTEL(clSharedMemAlloc);
115-
DEFINE_INTEL(clCreateBufferWithProperties);
116-
DEFINE_INTEL(clMemBlockingFree);
117-
DEFINE_INTEL(clMemFree);
118-
DEFINE_INTEL(clSetKernelArgMemPointer);
119-
DEFINE_INTEL(clEnqueueMemset);
120-
DEFINE_INTEL(clEnqueueMemcpy);
121-
DEFINE_INTEL(clGetMemAllocInfo);
122-
DEFINE(clGetDeviceFunctionPointer);
123-
DEFINE(clSetProgramSpecializationConstant);
124-
#undef DEFINE
125-
#undef DEFINE_INTEL
126111
};
127112

128113
namespace detail {
129-
#define DEFINE_GETTER_INTEL(t_pfx) \
114+
#define _EXT_FUNCTION_INTEL(t_pfx) \
130115
template <> \
131-
ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> \
132-
&get<t_pfx##Name, t_pfx##INTEL_fn>(::ExtFuncCacheCollection & C) { \
133-
return C.t_pfx##_Cache; \
116+
std::pair<t_pfx##INTEL_fn &, bool &> get<t_pfx##Name, t_pfx##INTEL_fn>( \
117+
ExtFuncsPerContextT &Funcs) { \
118+
using FPtrT = t_pfx##INTEL_fn; \
119+
std::pair<FPtrT &, bool &> Ret{ \
120+
Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \
121+
return Ret; \
134122
}
135-
#define DEFINE_GETTER(t_pfx) \
123+
124+
#define _EXT_FUNCTION(t_pfx) \
136125
template <> \
137-
ExtFuncCache<t_pfx##Name, t_pfx##_fn> &get<t_pfx##Name, t_pfx##_fn>( \
138-
::ExtFuncCacheCollection & C) { \
139-
return C.t_pfx##_Cache; \
126+
std::pair<t_pfx##_fn &, bool &> get<t_pfx##Name, t_pfx##_fn>( \
127+
ExtFuncsPerContextT &Funcs) { \
128+
using FPtrT = t_pfx##_fn; \
129+
std::pair<FPtrT &, bool &> Ret{ \
130+
Funcs.t_pfx##Func, Funcs.t_pfx##Initialized}; \
131+
return Ret; \
140132
}
141133

142-
DEFINE_GETTER_INTEL(clHostMemAlloc)
143-
DEFINE_GETTER_INTEL(clDeviceMemAlloc)
144-
DEFINE_GETTER_INTEL(clSharedMemAlloc)
145-
DEFINE_GETTER_INTEL(clCreateBufferWithProperties)
146-
DEFINE_GETTER_INTEL(clMemBlockingFree)
147-
DEFINE_GETTER_INTEL(clMemFree)
148-
DEFINE_GETTER_INTEL(clSetKernelArgMemPointer)
149-
DEFINE_GETTER_INTEL(clEnqueueMemset)
150-
DEFINE_GETTER_INTEL(clEnqueueMemcpy)
151-
DEFINE_GETTER_INTEL(clGetMemAllocInfo)
152-
DEFINE_GETTER(clGetDeviceFunctionPointer)
153-
DEFINE_GETTER(clSetProgramSpecializationConstant)
154-
#undef DEFINE_GETTER
155-
#undef DEFINE_GETTER_INTEL
134+
#include "ext_functions.inc"
135+
136+
#undef _EXT_FUNCTION
137+
#undef _EXT_FUNCTION_INTEL
156138
} // namespace detail
157139

158-
ExtFuncCacheCollection *ExtFuncCaches = nullptr;
140+
struct ExtFuncsCachesT {
141+
std::map<pi_context, ExtFuncsPerContextT> Caches;
142+
std::mutex Mtx;
143+
};
144+
145+
ExtFuncsCachesT *ExtFuncsCaches = nullptr;
159146

160147
// USM helper function to get an extension function pointer
161148
template <const char *FuncName, typename T>
162149
static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
163150
// TODO
164151
// Potentially redo caching as PI interface changes.
165-
ExtFuncCache<FuncName, T> &Cache = ExtFuncCaches->get<FuncName, T>();
152+
ExtFuncsPerContextT *PerContext = nullptr;
153+
{
154+
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx};
166155

167-
std::lock_guard<std::mutex> CacheLock{Cache.Mtx};
156+
PerContext = &ExtFuncsCaches->Caches[context];
157+
}
168158

169-
auto It = Cache.Cache.find(context);
159+
std::lock_guard<std::mutex> Lock{PerContext->Mtx};
160+
std::pair<T &, bool &> FuncInitialized = PerContext->get<FuncName, T>();
170161

171162
// if cached, return cached FuncPtr
172-
if (It != Cache.Cache.end()) {
163+
if (FuncInitialized.second) {
173164
// if cached that extension is not available return nullptr and
174165
// PI_INVALID_VALUE
175-
*fptr = It->second;
176-
return It->second ? PI_SUCCESS : PI_INVALID_VALUE;
166+
*fptr = FuncInitialized.first;
167+
return *fptr ? PI_SUCCESS : PI_INVALID_VALUE;
177168
}
178169

179170
cl_uint deviceCount;
@@ -207,12 +198,14 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
207198

208199
if (!FuncPtr) {
209200
// Cache that the extension is not available
210-
Cache.Cache[context] = nullptr;
201+
FuncInitialized.first = nullptr;
202+
FuncInitialized.second = true;
211203
return PI_INVALID_VALUE;
212204
}
213205

206+
FuncInitialized.first = FuncPtr;
207+
FuncInitialized.second = true;
214208
*fptr = FuncPtr;
215-
Cache.Cache[context] = FuncPtr;
216209

217210
return cast<pi_result>(ret_err);
218211
}
@@ -1466,45 +1459,17 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
14661459
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
14671460
pi_result piTearDown(void *PluginParameter) {
14681461
(void)PluginParameter;
1469-
delete ExtFuncCaches;
1470-
ExtFuncCaches = nullptr;
1462+
delete ExtFuncsCaches;
1463+
ExtFuncsCaches = nullptr;
14711464
return PI_SUCCESS;
14721465
}
14731466

14741467
pi_result piContextRelease(pi_context Context) {
1475-
#define RELEASE_EXT_FUNCS_CACHE_INTEL(t_pfx) \
1476-
{ \
1477-
ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> &Cache = \
1478-
ExtFuncCaches->get<t_pfx##Name, t_pfx##INTEL_fn>(); \
1479-
std::lock_guard<std::mutex> CacheLock{Cache.Mtx}; \
1480-
auto It = Cache.Cache.find(Context); \
1481-
if (It != Cache.Cache.end()) \
1482-
Cache.Cache.erase(It); \
1483-
}
1484-
#define RELEASE_EXT_FUNCS_CACHE(t_pfx) \
1485-
{ \
1486-
ExtFuncCache<t_pfx##Name, t_pfx##_fn> &Cache = \
1487-
ExtFuncCaches->get<t_pfx##Name, t_pfx##_fn>(); \
1488-
std::lock_guard<std::mutex> CacheLock{Cache.Mtx}; \
1489-
auto It = Cache.Cache.find(Context); \
1490-
if (It != Cache.Cache.end()) \
1491-
Cache.Cache.erase(It); \
1492-
}
1468+
{
1469+
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx};
14931470

1494-
RELEASE_EXT_FUNCS_CACHE_INTEL(clHostMemAlloc);
1495-
RELEASE_EXT_FUNCS_CACHE_INTEL(clDeviceMemAlloc);
1496-
RELEASE_EXT_FUNCS_CACHE_INTEL(clSharedMemAlloc);
1497-
RELEASE_EXT_FUNCS_CACHE_INTEL(clCreateBufferWithProperties);
1498-
RELEASE_EXT_FUNCS_CACHE_INTEL(clMemBlockingFree);
1499-
RELEASE_EXT_FUNCS_CACHE_INTEL(clMemFree);
1500-
RELEASE_EXT_FUNCS_CACHE_INTEL(clSetKernelArgMemPointer);
1501-
RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemset);
1502-
RELEASE_EXT_FUNCS_CACHE_INTEL(clEnqueueMemcpy);
1503-
RELEASE_EXT_FUNCS_CACHE_INTEL(clGetMemAllocInfo);
1504-
RELEASE_EXT_FUNCS_CACHE(clGetDeviceFunctionPointer);
1505-
RELEASE_EXT_FUNCS_CACHE(clSetProgramSpecializationConstant);
1506-
#undef RELEASE_EXT_FUNCS_CACHE
1507-
#undef RELEASE_EXT_FUNCS_CACHE_INTEL
1471+
ExtFuncsCaches->Caches.erase(Context);
1472+
}
15081473

15091474
return cast<pi_result>(clReleaseContext(cast<cl_context>(Context)));
15101475
}
@@ -1520,7 +1485,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
15201485
// PI interface supports higher version or the same version.
15211486
strncpy(PluginInit->PluginVersion, SupportedVersion, 4);
15221487

1523-
ExtFuncCaches = new ExtFuncCacheCollection;
1488+
ExtFuncsCaches = new ExtFuncsCachesT;
15241489

15251490
#define _PI_CL(pi_api, ocl_api) \
15261491
(PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);

0 commit comments

Comments
 (0)