@@ -81,99 +81,90 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
81
81
cl_program program, cl_uint spec_id, size_t spec_size,
82
82
const void *spec_value);
83
83
84
- // For the time being, cache is split into multiple maps of type
85
- // `context -> function_type'.
86
- // There's another way. A mapping of context to collection of function pointers.
87
- // Though, the former design allows for simultaneous access for different
88
- // function pointer for different contexts.
89
- template <const char *FuncName, typename FuncT> struct ExtFuncCache {
90
- std::map<pi_context, FuncT> Cache;
91
- // FIXME Use spin-lock to make lock/unlock faster and w/o context switching
92
- std::mutex Mtx;
93
- };
94
-
95
- struct ExtFuncCacheCollection ;
84
+ struct ExtFuncsPerContextT ;
96
85
97
86
namespace detail {
98
- template <const char *FuncName, typename FuncT>
99
- ExtFuncCache<FuncName, FuncT> & get (::ExtFuncCacheCollection &);
87
+ template <const char *FuncName, typename FuncT>
88
+ std::pair<FuncT &, bool &> get (ExtFuncsPerContextT &);
100
89
} // namespace detail
101
90
102
- struct ExtFuncCacheCollection {
91
+ struct ExtFuncsPerContextT {
92
+ #define _EXT_FUNCTION_INTEL (t_pfx ) \
93
+ t_pfx##INTEL_fn t_pfx##Func = nullptr ; \
94
+ bool t_pfx##Initialized = false ;
95
+
96
+ #define _EXT_FUNCTION (t_pfx ) \
97
+ t_pfx##_fn t_pfx##Func = nullptr ; \
98
+ bool t_pfx##Initialized = false ;
99
+
100
+ #include " ext_functions.inc"
101
+
102
+ #undef _EXT_FUNCTION
103
+ #undef _EXT_FUNCTION_INTEL
104
+
105
+ std::mutex Mtx;
106
+
103
107
template <const char *FuncName, typename FuncT>
104
- ExtFuncCache<FuncName, FuncT> & get () {
108
+ std::pair<FuncT &, bool &> get () {
105
109
return detail::get<FuncName, FuncT>(*this );
106
110
}
107
-
108
- #define DEFINE_INTEL (t_pfx ) \
109
- ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> t_pfx##_Cache
110
- #define DEFINE (t_pfx ) ExtFuncCache<t_pfx##Name, t_pfx##_fn> t_pfx##_Cache
111
-
112
- DEFINE_INTEL (clHostMemAlloc);
113
- DEFINE_INTEL (clDeviceMemAlloc);
114
- DEFINE_INTEL (clSharedMemAlloc);
115
- DEFINE_INTEL (clCreateBufferWithProperties);
116
- DEFINE_INTEL (clMemBlockingFree);
117
- DEFINE_INTEL (clMemFree);
118
- DEFINE_INTEL (clSetKernelArgMemPointer);
119
- DEFINE_INTEL (clEnqueueMemset);
120
- DEFINE_INTEL (clEnqueueMemcpy);
121
- DEFINE_INTEL (clGetMemAllocInfo);
122
- DEFINE (clGetDeviceFunctionPointer);
123
- DEFINE (clSetProgramSpecializationConstant);
124
- #undef DEFINE
125
- #undef DEFINE_INTEL
126
111
};
127
112
128
113
namespace detail {
129
- #define DEFINE_GETTER_INTEL (t_pfx ) \
114
+ #define _EXT_FUNCTION_INTEL (t_pfx ) \
130
115
template <> \
131
- ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> \
132
- &get<t_pfx##Name, t_pfx##INTEL_fn>(::ExtFuncCacheCollection & C) { \
133
- return C.t_pfx ##_Cache; \
116
+ std::pair<t_pfx##INTEL_fn &, bool &> get<t_pfx##Name, t_pfx##INTEL_fn>( \
117
+ ExtFuncsPerContextT &Funcs) { \
118
+ using FPtrT = t_pfx##INTEL_fn; \
119
+ std::pair<FPtrT &, bool &> Ret{ \
120
+ Funcs.t_pfx ##Func, Funcs.t_pfx ##Initialized}; \
121
+ return Ret; \
134
122
}
135
- #define DEFINE_GETTER (t_pfx ) \
123
+
124
+ #define _EXT_FUNCTION (t_pfx ) \
136
125
template <> \
137
- ExtFuncCache<t_pfx##Name, t_pfx##_fn> &get<t_pfx##Name, t_pfx##_fn>( \
138
- ::ExtFuncCacheCollection & C) { \
139
- return C.t_pfx ##_Cache; \
126
+ std::pair<t_pfx##_fn &, bool &> get<t_pfx##Name, t_pfx##_fn>( \
127
+ ExtFuncsPerContextT &Funcs) { \
128
+ using FPtrT = t_pfx##_fn; \
129
+ std::pair<FPtrT &, bool &> Ret{ \
130
+ Funcs.t_pfx ##Func, Funcs.t_pfx ##Initialized}; \
131
+ return Ret; \
140
132
}
141
133
142
- DEFINE_GETTER_INTEL (clHostMemAlloc)
143
- DEFINE_GETTER_INTEL (clDeviceMemAlloc)
144
- DEFINE_GETTER_INTEL (clSharedMemAlloc)
145
- DEFINE_GETTER_INTEL (clCreateBufferWithProperties)
146
- DEFINE_GETTER_INTEL (clMemBlockingFree)
147
- DEFINE_GETTER_INTEL (clMemFree)
148
- DEFINE_GETTER_INTEL (clSetKernelArgMemPointer)
149
- DEFINE_GETTER_INTEL (clEnqueueMemset)
150
- DEFINE_GETTER_INTEL (clEnqueueMemcpy)
151
- DEFINE_GETTER_INTEL (clGetMemAllocInfo)
152
- DEFINE_GETTER (clGetDeviceFunctionPointer)
153
- DEFINE_GETTER (clSetProgramSpecializationConstant)
154
- #undef DEFINE_GETTER
155
- #undef DEFINE_GETTER_INTEL
134
+ #include " ext_functions.inc"
135
+
136
+ #undef _EXT_FUNCTION
137
+ #undef _EXT_FUNCTION_INTEL
156
138
} // namespace detail
157
139
158
- ExtFuncCacheCollection *ExtFuncCaches = nullptr ;
140
+ struct ExtFuncsCachesT {
141
+ std::map<pi_context, ExtFuncsPerContextT> Caches;
142
+ std::mutex Mtx;
143
+ };
144
+
145
+ ExtFuncsCachesT *ExtFuncsCaches = nullptr ;
159
146
160
147
// USM helper function to get an extension function pointer
161
148
template <const char *FuncName, typename T>
162
149
static pi_result getExtFuncFromContext (pi_context context, T *fptr) {
163
150
// TODO
164
151
// Potentially redo caching as PI interface changes.
165
- ExtFuncCache<FuncName, T> &Cache = ExtFuncCaches->get <FuncName, T>();
152
+ ExtFuncsPerContextT *PerContext = nullptr ;
153
+ {
154
+ std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx };
166
155
167
- std::lock_guard<std::mutex> CacheLock{Cache.Mtx };
156
+ PerContext = &ExtFuncsCaches->Caches [context];
157
+ }
168
158
169
- auto It = Cache.Cache .find (context);
159
+ std::lock_guard<std::mutex> Lock{PerContext->Mtx };
160
+ std::pair<T &, bool &> FuncInitialized = PerContext->get <FuncName, T>();
170
161
171
162
// if cached, return cached FuncPtr
172
- if (It != Cache. Cache . end () ) {
163
+ if (FuncInitialized. second ) {
173
164
// if cached that extension is not available return nullptr and
174
165
// PI_INVALID_VALUE
175
- *fptr = It-> second ;
176
- return It-> second ? PI_SUCCESS : PI_INVALID_VALUE;
166
+ *fptr = FuncInitialized. first ;
167
+ return *fptr ? PI_SUCCESS : PI_INVALID_VALUE;
177
168
}
178
169
179
170
cl_uint deviceCount;
@@ -207,12 +198,14 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
207
198
208
199
if (!FuncPtr) {
209
200
// Cache that the extension is not available
210
- Cache.Cache [context] = nullptr ;
201
+ FuncInitialized.first = nullptr ;
202
+ FuncInitialized.second = true ;
211
203
return PI_INVALID_VALUE;
212
204
}
213
205
206
+ FuncInitialized.first = FuncPtr;
207
+ FuncInitialized.second = true ;
214
208
*fptr = FuncPtr;
215
- Cache.Cache [context] = FuncPtr;
216
209
217
210
return cast<pi_result>(ret_err);
218
211
}
@@ -1466,45 +1459,17 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel,
1466
1459
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
1467
1460
pi_result piTearDown (void *PluginParameter) {
1468
1461
(void )PluginParameter;
1469
- delete ExtFuncCaches ;
1470
- ExtFuncCaches = nullptr ;
1462
+ delete ExtFuncsCaches ;
1463
+ ExtFuncsCaches = nullptr ;
1471
1464
return PI_SUCCESS;
1472
1465
}
1473
1466
1474
1467
pi_result piContextRelease (pi_context Context) {
1475
- #define RELEASE_EXT_FUNCS_CACHE_INTEL (t_pfx ) \
1476
- { \
1477
- ExtFuncCache<t_pfx##Name, t_pfx##INTEL_fn> &Cache = \
1478
- ExtFuncCaches->get <t_pfx##Name, t_pfx##INTEL_fn>(); \
1479
- std::lock_guard<std::mutex> CacheLock{Cache.Mtx }; \
1480
- auto It = Cache.Cache .find (Context); \
1481
- if (It != Cache.Cache .end ()) \
1482
- Cache.Cache .erase (It); \
1483
- }
1484
- #define RELEASE_EXT_FUNCS_CACHE (t_pfx ) \
1485
- { \
1486
- ExtFuncCache<t_pfx##Name, t_pfx##_fn> &Cache = \
1487
- ExtFuncCaches->get <t_pfx##Name, t_pfx##_fn>(); \
1488
- std::lock_guard<std::mutex> CacheLock{Cache.Mtx }; \
1489
- auto It = Cache.Cache .find (Context); \
1490
- if (It != Cache.Cache .end ()) \
1491
- Cache.Cache .erase (It); \
1492
- }
1468
+ {
1469
+ std::lock_guard<std::mutex> Lock{ExtFuncsCaches->Mtx };
1493
1470
1494
- RELEASE_EXT_FUNCS_CACHE_INTEL (clHostMemAlloc);
1495
- RELEASE_EXT_FUNCS_CACHE_INTEL (clDeviceMemAlloc);
1496
- RELEASE_EXT_FUNCS_CACHE_INTEL (clSharedMemAlloc);
1497
- RELEASE_EXT_FUNCS_CACHE_INTEL (clCreateBufferWithProperties);
1498
- RELEASE_EXT_FUNCS_CACHE_INTEL (clMemBlockingFree);
1499
- RELEASE_EXT_FUNCS_CACHE_INTEL (clMemFree);
1500
- RELEASE_EXT_FUNCS_CACHE_INTEL (clSetKernelArgMemPointer);
1501
- RELEASE_EXT_FUNCS_CACHE_INTEL (clEnqueueMemset);
1502
- RELEASE_EXT_FUNCS_CACHE_INTEL (clEnqueueMemcpy);
1503
- RELEASE_EXT_FUNCS_CACHE_INTEL (clGetMemAllocInfo);
1504
- RELEASE_EXT_FUNCS_CACHE (clGetDeviceFunctionPointer);
1505
- RELEASE_EXT_FUNCS_CACHE (clSetProgramSpecializationConstant);
1506
- #undef RELEASE_EXT_FUNCS_CACHE
1507
- #undef RELEASE_EXT_FUNCS_CACHE_INTEL
1471
+ ExtFuncsCaches->Caches .erase (Context);
1472
+ }
1508
1473
1509
1474
return cast<pi_result>(clReleaseContext (cast<cl_context>(Context)));
1510
1475
}
@@ -1520,7 +1485,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
1520
1485
// PI interface supports higher version or the same version.
1521
1486
strncpy (PluginInit->PluginVersion , SupportedVersion, 4 );
1522
1487
1523
- ExtFuncCaches = new ExtFuncCacheCollection ;
1488
+ ExtFuncsCaches = new ExtFuncsCachesT ;
1524
1489
1525
1490
#define _PI_CL (pi_api, ocl_api ) \
1526
1491
(PluginInit->PiFunctionTable ).pi_api = (decltype (&::pi_api))(&ocl_api);
0 commit comments