25
25
#include < iostream>
26
26
#include < limits>
27
27
#include < map>
28
+ #include < memory>
29
+ #include < mutex>
28
30
#include < sstream>
29
31
#include < string>
30
32
#include < vector>
@@ -71,19 +73,105 @@ CONSTFIX char clGetDeviceFunctionPointerName[] =
71
73
72
74
#undef CONSTFIX
73
75
76
+ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceFunctionPointer_fn)(
77
+ cl_device_id device, cl_program program, const char *FuncName,
78
+ cl_ulong *ret_ptr);
79
+
80
+ typedef CL_API_ENTRY cl_int (CL_API_CALL *clSetProgramSpecializationConstant_fn)(
81
+ cl_program program, cl_uint spec_id, size_t spec_size,
82
+ const void *spec_value);
83
+
84
+ // For the time being, cache is split into multiple maps of type
85
+ // `context -> function_type'.
86
+ // There's another way. A mapping of context to collection of function pointers.
87
+ // Though, the former design allows for simultaneous access for different
88
+ // function pointer for different contexts.
89
+ template <const char *FuncName, typename FuncT>
90
+ struct ExtFuncCache {
91
+ std::map<pi_context, FuncT> Cache;
92
+ // FIXME Use spin-lock to make lock/unlock faster and w/o context switching
93
+ std::mutex Mtx;
94
+ };
95
+
96
+ struct ExtFuncCacheCollection ;
97
+
98
+ namespace detail {
99
+ template <const char *FuncName, typename FuncT>
100
+ ExtFuncCache<FuncName, FuncT> &get (::ExtFuncCacheCollection &);
101
+ } // namespace detail
102
+
103
+ struct ExtFuncCacheCollection {
104
+ template <const char *FuncName, typename FuncT>
105
+ ExtFuncCache<FuncName, FuncT> &get () {
106
+ return detail::get<FuncName, FuncT>(*this );
107
+ }
108
+
109
+ #define DEFINE_INTEL (t_pfx ) \
110
+ ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> t_pfx ## _Cache
111
+ #define DEFINE (t_pfx ) \
112
+ ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> t_pfx ## _Cache
113
+
114
+ DEFINE_INTEL (clHostMemAlloc);
115
+ DEFINE_INTEL (clDeviceMemAlloc);
116
+ DEFINE_INTEL (clSharedMemAlloc);
117
+ DEFINE_INTEL (clCreateBufferWithProperties);
118
+ DEFINE_INTEL (clMemBlockingFree);
119
+ DEFINE_INTEL (clMemFree);
120
+ DEFINE_INTEL (clSetKernelArgMemPointer);
121
+ DEFINE_INTEL (clEnqueueMemset);
122
+ DEFINE_INTEL (clEnqueueMemcpy);
123
+ DEFINE_INTEL (clGetMemAllocInfo);
124
+ DEFINE (clGetDeviceFunctionPointer);
125
+ DEFINE (clSetProgramSpecializationConstant);
126
+ #undef DEFINE
127
+ #undef DEFINE_INTEL
128
+ };
129
+
130
+ namespace detail {
131
+ #define DEFINE_GETTER_INTEL (t_pfx ) \
132
+ template <> ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> &get<t_pfx ## Name, t_pfx ## INTEL_fn>(::ExtFuncCacheCollection &C) { \
133
+ return C.t_pfx ## _Cache; \
134
+ }
135
+ #define DEFINE_GETTER (t_pfx ) \
136
+ template <> ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> &get<t_pfx ## Name, t_pfx ## _fn>(::ExtFuncCacheCollection &C) { \
137
+ return C.t_pfx ## _Cache; \
138
+ }
139
+
140
+ DEFINE_GETTER_INTEL (clHostMemAlloc)
141
+ DEFINE_GETTER_INTEL (clDeviceMemAlloc)
142
+ DEFINE_GETTER_INTEL (clSharedMemAlloc)
143
+ DEFINE_GETTER_INTEL (clCreateBufferWithProperties)
144
+ DEFINE_GETTER_INTEL (clMemBlockingFree)
145
+ DEFINE_GETTER_INTEL (clMemFree)
146
+ DEFINE_GETTER_INTEL (clSetKernelArgMemPointer)
147
+ DEFINE_GETTER_INTEL (clEnqueueMemset)
148
+ DEFINE_GETTER_INTEL (clEnqueueMemcpy)
149
+ DEFINE_GETTER_INTEL (clGetMemAllocInfo)
150
+ DEFINE_GETTER (clGetDeviceFunctionPointer)
151
+ DEFINE_GETTER (clSetProgramSpecializationConstant)
152
+ #undef DEFINE_GETTER
153
+ #undef DEFINE_GETTER_INTEL
154
+ } // namespace detail
155
+
156
+ ExtFuncCacheCollection *ExtFuncCaches = nullptr ;
157
+
74
158
// USM helper function to get an extension function pointer
75
159
template <const char *FuncName, typename T>
76
160
static pi_result getExtFuncFromContext (pi_context context, T *fptr) {
77
161
// TODO
78
162
// Potentially redo caching as PI interface changes.
79
- thread_local static std::map<pi_context, T> FuncPtrs;
163
+ ExtFuncCache<FuncName, T> &Cache = ExtFuncCaches->get <FuncName, T>();
164
+
165
+ std::lock_guard<std::mutex> CacheLock{Cache.Mtx };
166
+
167
+ auto It = Cache.Cache .find (context);
80
168
81
169
// if cached, return cached FuncPtr
82
- if (auto F = FuncPtrs[context] ) {
170
+ if (It != Cache. Cache . end () ) {
83
171
// if cached that extension is not available return nullptr and
84
172
// PI_INVALID_VALUE
85
- *fptr = F ;
86
- return F ? PI_SUCCESS : PI_INVALID_VALUE;
173
+ *fptr = It-> second ;
174
+ return It-> second ? PI_SUCCESS : PI_INVALID_VALUE;
87
175
}
88
176
89
177
cl_uint deviceCount;
@@ -117,12 +205,12 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
117
205
118
206
if (!FuncPtr) {
119
207
// Cache that the extension is not available
120
- FuncPtrs [context] = nullptr ;
208
+ Cache. Cache [context] = nullptr ;
121
209
return PI_INVALID_VALUE;
122
210
}
123
211
124
212
*fptr = FuncPtr;
125
- FuncPtrs [context] = FuncPtr;
213
+ Cache. Cache [context] = FuncPtr;
126
214
127
215
return cast<pi_result>(ret_err);
128
216
}
@@ -561,9 +649,6 @@ static bool is_in_separated_string(const std::string &str, char delimiter,
561
649
return false ;
562
650
}
563
651
564
- typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceFunctionPointer_fn)(
565
- cl_device_id device, cl_program program, const char *FuncName,
566
- cl_ulong *ret_ptr);
567
652
pi_result piextGetDeviceFunctionPointer (pi_device device, pi_program program,
568
653
const char *func_name,
569
654
pi_uint64 *function_pointer_ret) {
@@ -1304,10 +1389,6 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name,
1304
1389
}
1305
1390
}
1306
1391
1307
- typedef CL_API_ENTRY cl_int (CL_API_CALL *clSetProgramSpecializationConstant_fn)(
1308
- cl_program program, cl_uint spec_id, size_t spec_size,
1309
- const void *spec_value);
1310
-
1311
1392
pi_result piextProgramSetSpecializationConstant (pi_program prog,
1312
1393
pi_uint32 spec_id,
1313
1394
size_t spec_size,
@@ -1383,9 +1464,48 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
1383
1464
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
1384
1465
pi_result piTearDown (void *PluginParameter) {
1385
1466
(void )PluginParameter;
1467
+ delete ExtFuncCaches;
1468
+ ExtFuncCaches = nullptr ;
1386
1469
return PI_SUCCESS;
1387
1470
}
1388
1471
1472
+ pi_result piContextRelease (pi_context Context) {
1473
+ #define RELEASE_EXT_FUNCS_CACHE_INTEL (t_pfx ) \
1474
+ { \
1475
+ ExtFuncCache<t_pfx ## Name, t_pfx ## INTEL_fn> &Cache = ExtFuncCaches->get <t_pfx ## Name, t_pfx ## INTEL_fn>(); \
1476
+ std::lock_guard<std::mutex> CacheLock{Cache.Mtx }; \
1477
+ auto It = Cache.Cache .find (Context); \
1478
+ if (It != Cache.Cache .end ()) \
1479
+ Cache.Cache .erase (It); \
1480
+ }
1481
+ #define RELEASE_EXT_FUNCS_CACHE (t_pfx ) \
1482
+ { \
1483
+ ExtFuncCache<t_pfx ## Name, t_pfx ## _fn> &Cache = ExtFuncCaches->get <t_pfx ## Name, t_pfx ## _fn>(); \
1484
+ std::lock_guard<std::mutex> CacheLock{Cache.Mtx }; \
1485
+ auto It = Cache.Cache .find (Context); \
1486
+ if (It != Cache.Cache .end ()) \
1487
+ Cache.Cache .erase (It); \
1488
+ }
1489
+
1490
+
1491
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clHostMemAlloc);
1492
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clDeviceMemAlloc);
1493
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clSharedMemAlloc);
1494
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clCreateBufferWithProperties);
1495
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clMemBlockingFree);
1496
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clMemFree);
1497
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clSetKernelArgMemPointer);
1498
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clEnqueueMemset);
1499
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clEnqueueMemcpy);
1500
+ RELEASE_EXT_FUNCS_CACHE_INTEL (clGetMemAllocInfo);
1501
+ RELEASE_EXT_FUNCS_CACHE (clGetDeviceFunctionPointer);
1502
+ RELEASE_EXT_FUNCS_CACHE (clSetProgramSpecializationConstant);
1503
+ #undef RELEASE_EXT_FUNCS_CACHE
1504
+ #undef RELEASE_EXT_FUNCS_CACHE_INTEL
1505
+
1506
+ return cast<pi_result>(clReleaseContext (cast<cl_context>(Context)));
1507
+ }
1508
+
1389
1509
pi_result piPluginInit (pi_plugin *PluginInit) {
1390
1510
int CompareVersions = strcmp (PluginInit->PiVersion , SupportedVersion);
1391
1511
if (CompareVersions < 0 ) {
@@ -1397,6 +1517,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
1397
1517
// PI interface supports higher version or the same version.
1398
1518
strncpy (PluginInit->PluginVersion , SupportedVersion, 4 );
1399
1519
1520
+ ExtFuncCaches = new ExtFuncCacheCollection;
1521
+
1400
1522
#define _PI_CL (pi_api, ocl_api ) \
1401
1523
(PluginInit->PiFunctionTable ).pi_api = (decltype (&::pi_api))(&ocl_api);
1402
1524
@@ -1420,7 +1542,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
1420
1542
_PI_CL (piContextCreate, piContextCreate)
1421
1543
_PI_CL (piContextGetInfo, clGetContextInfo)
1422
1544
_PI_CL (piContextRetain, clRetainContext)
1423
- _PI_CL (piContextRelease, clReleaseContext )
1545
+ _PI_CL (piContextRelease, piContextRelease )
1424
1546
_PI_CL (piextContextGetNativeHandle, piextContextGetNativeHandle)
1425
1547
_PI_CL (piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle)
1426
1548
// Queue
0 commit comments