@@ -186,114 +186,69 @@ template void custom_blas_dot_c<long>(void* array1_in, void* array2_in, void* re
186
186
template void custom_blas_dot_c<float >(void * array1_in, void * array2_in, void * result1, size_t size);
187
187
template void custom_blas_dot_c<double >(void * array1_in, void * array2_in, void * result1, size_t size);
188
188
189
- template <typename _DataType>
190
- void custom_lapack_syevd_c ( void * array_in, void * result1, size_t size)
189
+ template <typename _DataType, typename _ResultType >
190
+ void custom_lapack_eig_c ( const void * array_in, void * result1, void * result2 , size_t size)
191
191
{
192
+ // TODO this kernel works with square 2-D array only
193
+
194
+ // Kernel Type for calculation is double type
195
+ // because interface requires float type but calculations are expected in double type
196
+
192
197
if (!size)
193
198
{
194
199
return ;
195
200
}
196
201
197
- _DataType* array = reinterpret_cast <_DataType*>(array_in);
198
- _DataType* result = reinterpret_cast <_DataType*>(result1);
202
+ cl::sycl::event event;
199
203
200
- if constexpr (std::is_same< _DataType, double >::value || std::is_same<_DataType, float >::value)
201
- {
202
- cl::sycl::event event ;
204
+ const _DataType* array = reinterpret_cast < const _DataType*>(array_in);
205
+ _ResultType* result_val = reinterpret_cast <_ResultType*>(result1);
206
+ _ResultType* result_vec = reinterpret_cast <_ResultType*>(result2) ;
203
207
204
- _DataType* syevd_array = reinterpret_cast <_DataType *>(dpnp_memory_alloc_c (size * size * sizeof (_DataType )));
205
- dpnp_memory_memcpy_c (syevd_array, array, size * size * sizeof (_DataType ));
208
+ double * result_val_kern = reinterpret_cast <double *>(dpnp_memory_alloc_c (size * sizeof (double )));
209
+ double * result_vec_kern = reinterpret_cast < double *>( dpnp_memory_alloc_c ( size * size * sizeof (double ) ));
206
210
207
- const std::int64_t lda = std::max<size_t >(1UL , size);
211
+ // type conversion. Also, math library requires copy memory because override
212
+ for (size_t it = 0 ; it < (size * size); ++it)
213
+ {
214
+ result_vec_kern[it] = array[it];
215
+ }
208
216
209
- const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<_DataType>(
210
- DPNP_QUEUE, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda);
217
+ const std::int64_t lda = std::max<size_t >(1UL , size);
211
218
212
- _DataType* scratchpad = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (scratchpad_size * sizeof (_DataType)));
219
+ const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<double >(
220
+ DPNP_QUEUE, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda);
213
221
214
- event = mkl_lapack::syevd (DPNP_QUEUE, // queue
215
- oneapi::mkl::job::vec, // jobz
216
- oneapi::mkl::uplo::upper, // uplo
217
- size, // The order of the matrix A (0≤n)
218
- syevd_array, // will be overwritten with eigenvectors
219
- lda,
220
- result,
221
- scratchpad,
222
- scratchpad_size);
223
- event.wait ();
222
+ double * scratchpad = reinterpret_cast <double *>(dpnp_memory_alloc_c (scratchpad_size * sizeof (double )));
224
223
225
- dpnp_memory_free_c (scratchpad);
224
+ event = mkl_lapack::syevd (DPNP_QUEUE, // queue
225
+ oneapi::mkl::job::vec, // jobz
226
+ oneapi::mkl::uplo::upper, // uplo
227
+ size, // The order of the matrix A (0 <= n)
228
+ result_vec_kern, // will be overwritten with eigenvectors
229
+ lda,
230
+ result_val_kern,
231
+ scratchpad,
232
+ scratchpad_size);
233
+ event.wait ();
226
234
227
- custom_elemwise_transpose_c<_DataType>(
228
- syevd_array, {(long )size, (long )size}, {(long )size, (long )size}, {1 , 0 }, array, size * size);
235
+ dpnp_memory_free_c (scratchpad);
229
236
230
- dpnp_memory_free_c (syevd_array);
231
- }
232
- else
237
+ for (size_t it1 = 0 ; it1 < size; ++it1)
233
238
{
234
- // TODO: implement SYCL kernel for int/long input
239
+ result_val[it1] = result_val_kern[it1];
240
+ for (size_t it2 = 0 ; it2 < size; ++it2)
241
+ {
242
+ // copy + transpose
243
+ result_vec[it2 * size + it1] = result_vec_kern[it1 * size + it2];
244
+ }
235
245
}
236
- }
237
246
238
- template void custom_lapack_syevd_c<int >(void * array1_in, void * result1, size_t size);
239
- template void custom_lapack_syevd_c<long >(void * array1_in, void * result1, size_t size);
240
- template void custom_lapack_syevd_c<float >(void * array1_in, void * result1, size_t size);
241
- template void custom_lapack_syevd_c<double >(void * array1_in, void * result1, size_t size);
242
-
243
- #if 0 // Example for OpenCL kernel
244
- #include <map>
245
- #include <typeindex>
246
-
247
- static std::map<std::type_index, std::string> types_map = {{typeid(long), "long"}, {typeid(int), "int"}};
248
-
249
- static const char* blas_gemm_naive =
250
- "//#define __KERNEL_TYPE__ long \n"
251
- "#define __KERNEL_TYPE_ZERO__ 0 \n"
252
- "__kernel void blas_gemm_naive(__global __KERNEL_TYPE__* array_1, \n"
253
- " __global __KERNEL_TYPE__* array_2, \n"
254
- " __global __KERNEL_TYPE__* result, \n"
255
- " unsigned long size) \n"
256
- "{ \n"
257
- " size_t i = get_global_id(0); //for (size_t i = 0; i < size; ++i) \n"
258
- " { \n"
259
- " size_t j = get_global_id(1); //for (size_t j = 0; j < size; ++j) \n"
260
- " { \n"
261
- " __KERNEL_TYPE__ temp = __KERNEL_TYPE_ZERO__; \n"
262
- " for (size_t k = 0; k < size; ++k) \n"
263
- " { \n"
264
- " const size_t index_1 = i * size + k; \n"
265
- " const size_t index_2 = k * size + j; \n"
266
- " temp += array_1[index_1] * array_2[index_2]; \n"
267
- " } \n"
268
- " \n"
269
- " const size_t index_result = i * size + j; \n"
270
- " result[index_result] = temp; \n"
271
- " } \n"
272
- " } \n"
273
- "} \n";
274
-
275
- template <typename _DataType>
276
- void custom_dgemm_c_opencl(void* array_1_in, void* array_2_in, void* result_1, size_t size)
277
- {
278
- _DataType* array_1 = reinterpret_cast<_DataType*>(array_1_in);
279
- _DataType* array_2 = reinterpret_cast<_DataType*>(array_2_in);
280
- _DataType* result = reinterpret_cast<_DataType*>(result_1);
281
-
282
- std::string compile_time_options("-cl-std=CL1.2");
283
- compile_time_options += " -D__KERNEL_TYPE__=" + types_map.at(typeid(_DataType));
284
-
285
- cl::sycl::program program_src(DPNP_QUEUE.get_context());
286
- program_src.build_with_source(blas_gemm_naive, compile_time_options);
287
-
288
- cl::sycl::range<2> kernel_work_ids(size, size); // dimensions are: "i" and "j"
289
- DPNP_QUEUE.submit([&](cl::sycl::handler& cgh) {
290
- cgh.set_args(array_1, array_2, result, size);
291
- cgh.parallel_for(kernel_work_ids, program_src.get_kernel("blas_gemm_naive"));
292
- });
293
-
294
- DPNP_QUEUE.wait();
247
+ dpnp_memory_free_c (result_val_kern);
248
+ dpnp_memory_free_c (result_vec_kern);
295
249
}
296
250
297
- template void custom_dgemm_c_opencl<long>(void* array_1_in, void* array_2_in, void* result_1, size_t size);
298
-
299
- #endif
251
+ template void custom_lapack_eig_c<int , double >(const void * array_in, void * result1, void * result2, size_t size);
252
+ template void custom_lapack_eig_c<long , double >(const void * array_in, void * result1, void * result2, size_t size);
253
+ template void custom_lapack_eig_c<float , float >(const void * array_in, void * result1, void * result2, size_t size);
254
+ template void custom_lapack_eig_c<double , double >(const void * array_in, void * result1, void * result2, size_t size);
0 commit comments