Skip to content

Commit c510c88

Browse files
author
sergei
authored
Revert "[SYCL] Clear extensions functions cache upon context release (#5282)" (#5433)
This reverts commit 2eed402. Revert is due massive performance regression introduced. Employing of spin-lock and/or pre-initialization didn't help reduce performance impact.
1 parent f4b3123 commit c510c88

File tree

3 files changed

+14
-121
lines changed

3 files changed

+14
-121
lines changed

sycl/plugins/opencl/ext_functions.inc

Lines changed: 0 additions & 20 deletions
This file was deleted.

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 14 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
#include <iostream>
2626
#include <limits>
2727
#include <map>
28-
#include <memory>
29-
#include <mutex>
3028
#include <sstream>
3129
#include <string>
3230
#include <vector>
@@ -73,93 +71,19 @@ CONSTFIX char clGetDeviceFunctionPointerName[] =
7371

7472
#undef CONSTFIX
7573

76-
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
77-
cl_device_id device, cl_program program, const char *FuncName,
78-
cl_ulong *ret_ptr);
79-
80-
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
81-
cl_program program, cl_uint spec_id, size_t spec_size,
82-
const void *spec_value);
83-
84-
struct ExtFuncsPerContextT;
85-
86-
namespace detail {
87-
template <const char *FuncName, typename FuncT>
88-
std::pair<FuncT &, bool &> get(ExtFuncsPerContextT &);
89-
} // namespace detail
90-
91-
struct ExtFuncsPerContextT {
92-
#define _EXT_FUNCTION_INTEL(t_pfx) \
93-
t_pfx##INTEL_fn t_pfx##Func = nullptr; \
94-
bool t_pfx##Initialized = false;
95-
96-
#define _EXT_FUNCTION(t_pfx) \
97-
t_pfx##_fn t_pfx##Func = nullptr; \
98-
bool t_pfx##Initialized = false;
99-
100-
#include "ext_functions.inc"
101-
102-
#undef _EXT_FUNCTION
103-
#undef _EXT_FUNCTION_INTEL
104-
105-
std::mutex Mtx;
106-
107-
template <const char *FuncName, typename FuncT>
108-
std::pair<FuncT &, bool &> get() {
109-
return detail::get<FuncName, FuncT>(*this);
110-
}
111-
};
112-
113-
namespace detail {
114-
115-
#define _EXT_FUNCTION_COMMON(t_pfx, t_pfx_suff) \
116-
template <> \
117-
std::pair<t_pfx_suff##_fn &, bool &> get<t_pfx##Name, t_pfx_suff##_fn>( \
118-
ExtFuncsPerContextT & Funcs) { \
119-
using FPtrT = t_pfx_suff##_fn; \
120-
std::pair<FPtrT &, bool &> Ret{Funcs.t_pfx##Func, \
121-
Funcs.t_pfx##Initialized}; \
122-
return Ret; \
123-
}
124-
#define _EXT_FUNCTION_INTEL(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx##INTEL)
125-
#define _EXT_FUNCTION(t_pfx) _EXT_FUNCTION_COMMON(t_pfx, t_pfx)
126-
127-
#include "ext_functions.inc"
128-
129-
#undef _EXT_FUNCTION
130-
#undef _EXT_FUNCTION_INTEL
131-
#undef _EXT_FUNCTION_COMMON
132-
} // namespace detail
133-
134-
struct ExtFuncsCachesT {
135-
std::map<pi_context, ExtFuncsPerContextT> Caches;
136-
std::mutex Mtx;
137-
};
138-
139-
ExtFuncsCachesT *ExtFuncsCaches = nullptr;
140-
14174
// USM helper function to get an extension function pointer
14275
template <const char *FuncName, typename T>
14376
static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
14477
// TODO
14578
// Potentially redo caching as PI interface changes.
146-
ExtFuncsPerContextT *PerContext = nullptr;
147-
{
148-
assert(ExtFuncsCaches);
149-
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx};
150-
151-
PerContext = &ExtFuncsCaches->Caches[context];
152-
}
153-
154-
std::lock_guard<std::mutex> Lock{PerContext->Mtx};
155-
std::pair<T &, bool &> FuncInitialized = PerContext->get<FuncName, T>();
79+
thread_local static std::map<pi_context, T> FuncPtrs;
15680

15781
// if cached, return cached FuncPtr
158-
if (FuncInitialized.second) {
82+
if (auto F = FuncPtrs[context]) {
15983
// if cached that extension is not available return nullptr and
16084
// PI_INVALID_VALUE
161-
*fptr = FuncInitialized.first;
162-
return *fptr ? PI_SUCCESS : PI_INVALID_VALUE;
85+
*fptr = F;
86+
return F ? PI_SUCCESS : PI_INVALID_VALUE;
16387
}
16488

16589
cl_uint deviceCount;
@@ -191,17 +115,14 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
191115
T FuncPtr =
192116
(T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
193117

194-
// We're about to store the cached value. Mark this cache entry initialized.
195-
FuncInitialized.second = true;
196-
197118
if (!FuncPtr) {
198119
// Cache that the extension is not available
199-
FuncInitialized.first = nullptr;
120+
FuncPtrs[context] = nullptr;
200121
return PI_INVALID_VALUE;
201122
}
202123

203-
FuncInitialized.first = FuncPtr;
204124
*fptr = FuncPtr;
125+
FuncPtrs[context] = FuncPtr;
205126

206127
return cast<pi_result>(ret_err);
207128
}
@@ -641,6 +562,9 @@ static bool is_in_separated_string(const std::string &str, char delimiter,
641562
return false;
642563
}
643564

565+
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
566+
cl_device_id device, cl_program program, const char *FuncName,
567+
cl_ulong *ret_ptr);
644568
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
645569
const char *func_name,
646570
pi_uint64 *function_pointer_ret) {
@@ -1381,6 +1305,10 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name,
13811305
}
13821306
}
13831307

1308+
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
1309+
cl_program program, cl_uint spec_id, size_t spec_size,
1310+
const void *spec_value);
1311+
13841312
pi_result piextProgramSetSpecializationConstant(pi_program prog,
13851313
pi_uint32 spec_id,
13861314
size_t spec_size,
@@ -1456,21 +1384,9 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
14561384
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
14571385
pi_result piTearDown(void *PluginParameter) {
14581386
(void)PluginParameter;
1459-
delete ExtFuncsCaches;
1460-
ExtFuncsCaches = nullptr;
14611387
return PI_SUCCESS;
14621388
}
14631389

1464-
pi_result piContextRelease(pi_context Context) {
1465-
{
1466-
std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx};
1467-
1468-
ExtFuncsCaches->Caches.erase(Context);
1469-
}
1470-
1471-
return cast<pi_result>(clReleaseContext(cast<cl_context>(Context)));
1472-
}
1473-
14741390
pi_result piPluginInit(pi_plugin *PluginInit) {
14751391
int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion);
14761392
if (CompareVersions < 0) {
@@ -1482,8 +1398,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
14821398
// PI interface supports higher version or the same version.
14831399
strncpy(PluginInit->PluginVersion, SupportedVersion, 4);
14841400

1485-
ExtFuncsCaches = new ExtFuncsCachesT;
1486-
14871401
#define _PI_CL(pi_api, ocl_api) \
14881402
(PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
14891403

@@ -1507,7 +1421,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
15071421
_PI_CL(piContextCreate, piContextCreate)
15081422
_PI_CL(piContextGetInfo, clGetContextInfo)
15091423
_PI_CL(piContextRetain, clRetainContext)
1510-
_PI_CL(piContextRelease, piContextRelease)
1424+
_PI_CL(piContextRelease, clReleaseContext)
15111425
_PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle)
15121426
_PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle)
15131427
// Queue

sycl/test/abi/pi_opencl_symbol_check.dump

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# UNSUPPORTED: libcxx
99

1010
piContextCreate
11-
piContextRelease
1211
piDeviceGetInfo
1312
piDevicesGet
1413
piEnqueueMemBufferMap

0 commit comments

Comments
 (0)