Skip to content

Commit 07bee6d

Browse files
authored
ELEMWISE 1arg_2types add MKL kernels (#130)
* ELEMWISE 1arg_2types add MKL kernel
1 parent 1a0081b commit 07bee6d

File tree

3 files changed

+65
-31
lines changed

3 files changed

+65
-31
lines changed

dpnp/backend/backend_iface.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ INP_DLLEXPORT void custom_var_c(
402402

403403
#include <custom_1arg_1type_tbl.hpp>
404404

405-
#define MACRO_CUSTOM_1ARG_2TYPES_OP(__name__, __operation__) \
405+
#define MACRO_CUSTOM_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \
406406
template <typename _DataType_input, typename _DataType_output> \
407407
INP_DLLEXPORT void __name__(void* array1, void* result1, size_t size);
408408

dpnp/backend/custom_1arg_2type_tbl.hpp

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,39 +31,65 @@
3131
* Parameters:
3232
* - public name of the function and kernel name
3333
* - operation used to calculate the result
34+
* - mkl operation used to calculate the result
3435
*
3536
*/
3637

3738
#ifndef MACRO_CUSTOM_1ARG_2TYPES_OP
3839
#error "MACRO_CUSTOM_1ARG_2TYPES_OP is not defined"
3940
#endif
4041

41-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_acos_c, cl::sycl::acos(input_elem))
42-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_acosh_c, cl::sycl::acosh(input_elem))
43-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_asin_c, cl::sycl::asin(input_elem))
44-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_asinh_c, cl::sycl::asinh(input_elem))
45-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_atan_c, cl::sycl::atan(input_elem))
46-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_atanh_c, cl::sycl::atanh(input_elem))
47-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cbrt_c, cl::sycl::cbrt(input_elem))
48-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_ceil_c, cl::sycl::ceil(input_elem))
49-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cos_c, cl::sycl::cos(input_elem))
50-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cosh_c, cl::sycl::cosh(input_elem))
51-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_degrees_c, cl::sycl::degrees(input_elem))
52-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_exp2_c, cl::sycl::exp2(input_elem))
53-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_exp_c, cl::sycl::exp(input_elem))
54-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_expm1_c, cl::sycl::expm1(input_elem))
55-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_fabs_c, cl::sycl::fabs(input_elem))
56-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_floor_c, cl::sycl::floor(input_elem))
57-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log10_c, cl::sycl::log10(input_elem))
58-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log1p_c, cl::sycl::log1p(input_elem))
59-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log2_c, cl::sycl::log2(input_elem))
60-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log_c, cl::sycl::log(input_elem))
61-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_radians_c, cl::sycl::radians(input_elem))
62-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sin_c, cl::sycl::sin(input_elem))
63-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sinh_c, cl::sycl::sinh(input_elem))
64-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sqrt_c, cl::sycl::sqrt(input_elem))
65-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_tan_c, cl::sycl::tan(input_elem))
66-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_tanh_c, cl::sycl::tanh(input_elem))
67-
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_trunc_c, cl::sycl::trunc(input_elem))
42+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_acos_c, cl::sycl::acos(input_elem),
43+
oneapi::mkl::vm::acos(DPNP_QUEUE, size, array1, result))
44+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_acosh_c, cl::sycl::acosh(input_elem),
45+
oneapi::mkl::vm::acosh(DPNP_QUEUE, size, array1, result))
46+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_asin_c, cl::sycl::asin(input_elem),
47+
oneapi::mkl::vm::asin(DPNP_QUEUE, size, array1, result))
48+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_asinh_c, cl::sycl::asinh(input_elem),
49+
oneapi::mkl::vm::asinh(DPNP_QUEUE, size, array1, result))
50+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_atan_c, cl::sycl::atan(input_elem),
51+
oneapi::mkl::vm::atan(DPNP_QUEUE, size, array1, result))
52+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_atanh_c, cl::sycl::atanh(input_elem),
53+
oneapi::mkl::vm::atanh(DPNP_QUEUE, size, array1, result))
54+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cbrt_c, cl::sycl::cbrt(input_elem),
55+
oneapi::mkl::vm::cbrt(DPNP_QUEUE, size, array1, result))
56+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_ceil_c, cl::sycl::ceil(input_elem),
57+
oneapi::mkl::vm::ceil(DPNP_QUEUE, size, array1, result))
58+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cos_c, cl::sycl::cos(input_elem),
59+
oneapi::mkl::vm::cos(DPNP_QUEUE, size, array1, result))
60+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_cosh_c, cl::sycl::cosh(input_elem),
61+
oneapi::mkl::vm::cosh(DPNP_QUEUE, size, array1, result))
62+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_degrees_c, cl::sycl::degrees(input_elem), DPNP_QUEUE.submit(kernel_func))
63+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_exp2_c, cl::sycl::exp2(input_elem),
64+
oneapi::mkl::vm::exp2(DPNP_QUEUE, size, array1, result))
65+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_exp_c, cl::sycl::exp(input_elem),
66+
oneapi::mkl::vm::exp(DPNP_QUEUE, size, array1, result))
67+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_expm1_c, cl::sycl::expm1(input_elem),
68+
oneapi::mkl::vm::expm1(DPNP_QUEUE, size, array1, result))
69+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_fabs_c, cl::sycl::fabs(input_elem),
70+
oneapi::mkl::vm::abs(DPNP_QUEUE, size, array1, result))
71+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_floor_c, cl::sycl::floor(input_elem),
72+
oneapi::mkl::vm::floor(DPNP_QUEUE, size, array1, result))
73+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log10_c, cl::sycl::log10(input_elem),
74+
oneapi::mkl::vm::log10(DPNP_QUEUE, size, array1, result))
75+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log1p_c, cl::sycl::log1p(input_elem),
76+
oneapi::mkl::vm::log1p(DPNP_QUEUE, size, array1, result))
77+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log2_c, cl::sycl::log2(input_elem),
78+
oneapi::mkl::vm::log2(DPNP_QUEUE, size, array1, result))
79+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_log_c, cl::sycl::log(input_elem),
80+
oneapi::mkl::vm::ln(DPNP_QUEUE, size, array1, result))
81+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_radians_c, cl::sycl::radians(input_elem), DPNP_QUEUE.submit(kernel_func))
82+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sin_c, cl::sycl::sin(input_elem),
83+
oneapi::mkl::vm::sin(DPNP_QUEUE, size, array1, result))
84+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sinh_c, cl::sycl::sinh(input_elem),
85+
oneapi::mkl::vm::sinh(DPNP_QUEUE, size, array1, result))
86+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_sqrt_c, cl::sycl::sqrt(input_elem),
87+
oneapi::mkl::vm::sqrt(DPNP_QUEUE, size, array1, result))
88+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_tan_c, cl::sycl::tan(input_elem),
89+
oneapi::mkl::vm::tan(DPNP_QUEUE, size, array1, result))
90+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_tanh_c, cl::sycl::tanh(input_elem),
91+
oneapi::mkl::vm::tanh(DPNP_QUEUE, size, array1, result))
92+
MACRO_CUSTOM_1ARG_2TYPES_OP(dpnp_trunc_c, cl::sycl::trunc(input_elem),
93+
oneapi::mkl::vm::trunc(DPNP_QUEUE, size, array1, result))
6894

6995
#undef MACRO_CUSTOM_1ARG_2TYPES_OP

dpnp/backend/custom_kernels_elemwise.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,15 @@
3131
#include "backend_utils.hpp"
3232
#include "queue_sycl.hpp"
3333

34-
#define MACRO_CUSTOM_1ARG_2TYPES_OP(__name__, __operation__) \
34+
#define MACRO_CUSTOM_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \
3535
template <typename _KernelNameSpecialization> \
3636
class __name__##_kernel; \
3737
\
3838
template <typename _DataType_input, typename _DataType_output> \
3939
void __name__(void* array1_in, void* result1, size_t size) \
4040
{ \
4141
cl::sycl::event event; \
42+
\
4243
_DataType_input* array1 = reinterpret_cast<_DataType_input*>(array1_in); \
4344
_DataType_output* result = reinterpret_cast<_DataType_output*>(result1); \
4445
\
@@ -47,15 +48,22 @@
4748
size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/ \
4849
{ \
4950
_DataType_output input_elem = array1[i]; \
50-
result[i] = __operation__; \
51+
result[i] = __operation1__; \
5152
} \
5253
}; \
5354
\
5455
auto kernel_func = [&](cl::sycl::handler& cgh) { \
5556
cgh.parallel_for<class __name__##_kernel<_DataType_input>>(gws, kernel_parallel_for_func); \
5657
}; \
5758
\
58-
event = DPNP_QUEUE.submit(kernel_func); \
59+
if constexpr (std::is_same<_DataType_input, double>::value || std::is_same<_DataType_input, float>::value) \
60+
{ \
61+
event = __operation2__; \
62+
} \
63+
else \
64+
{ \
65+
event = DPNP_QUEUE.submit(kernel_func); \
66+
} \
5967
\
6068
event.wait(); \
6169
}

0 commit comments

Comments
 (0)