Skip to content

Commit 1377e4c

Browse files
Merge master into reuse_dpctl_pow
2 parents a4a514b + ff71682 commit 1377e4c

22 files changed

+694
-109
lines changed

.github/workflows/conda-package.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ env:
1515
test_arraycreation.py
1616
test_dot.py
1717
test_dparray.py
18+
test_copy.py
1819
test_fft.py
1920
test_linalg.py
2021
test_logic.py

doc/reference/math.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Handling complex numbers
169169
dpnp.imag
170170
dpnp.conj
171171
dpnp.conjugate
172+
dpnp.proj
172173

173174

174175
Extrema Finding

doc/reference/ufunc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Math operations
4242
dpnp.log10
4343
dpnp.expm1
4444
dpnp.log1p
45+
dpnp.proj
4546
dpnp.sqrt
4647
dpnp.square
4748
dpnp.reciprocal

dpnp/backend/kernels/dpnp_krnl_bitwise.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,14 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
6868
sg.get_group_id()[0] * max_sg_size);
6969

7070
if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
71-
using multi_ptrT =
72-
sycl::multi_ptr<_DataType,
73-
sycl::access::address_space::global_space>;
71+
auto input_multi_ptr = sycl::address_space_cast<
72+
sycl::access::address_space::global_space,
73+
sycl::access::decorated::yes>(&input_data[start]);
74+
auto result_multi_ptr = sycl::address_space_cast<
75+
sycl::access::address_space::global_space,
76+
sycl::access::decorated::yes>(&result[start]);
7477

75-
sycl::vec<_DataType, vec_sz> x =
76-
sg.load<vec_sz>(multi_ptrT(&input_data[start]));
78+
sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(input_multi_ptr);
7779
sycl::vec<_DataType, vec_sz> res_vec;
7880

7981
if constexpr (std::is_same_v<_DataType, bool>) {
@@ -86,7 +88,7 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
8688
res_vec = ~x;
8789
}
8890

89-
sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
91+
sg.store<vec_sz>(result_multi_ptr, res_vec);
9092
}
9193
else {
9294
for (size_t k = start + sg.get_local_id()[0]; k < size;

dpnp/backend/kernels/dpnp_krnl_elemwise.cpp

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,8 +1326,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
13261326
{ \
13271327
constexpr size_t lws = 64; \
13281328
constexpr unsigned int vec_sz = 8; \
1329-
constexpr sycl::access::address_space global_space = \
1330-
sycl::access::address_space::global_space; \
13311329
\
13321330
auto gws_range = sycl::range<1>( \
13331331
((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * \
@@ -1344,12 +1342,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
13441342
\
13451343
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
13461344
result_size) { \
1347-
using input1_ptrT = \
1348-
sycl::multi_ptr<_DataType_input1, global_space>; \
1349-
using input2_ptrT = \
1350-
sycl::multi_ptr<_DataType_input2, global_space>; \
1351-
using result_ptrT = \
1352-
sycl::multi_ptr<_DataType_output, global_space>; \
1345+
auto input1_multi_ptr = sycl::address_space_cast< \
1346+
sycl::access::address_space::global_space, \
1347+
sycl::access::decorated::yes>( \
1348+
&input1_data[start]); \
1349+
auto input2_multi_ptr = sycl::address_space_cast< \
1350+
sycl::access::address_space::global_space, \
1351+
sycl::access::decorated::yes>( \
1352+
&input2_data[start]); \
1353+
auto result_multi_ptr = sycl::address_space_cast< \
1354+
sycl::access::address_space::global_space, \
1355+
sycl::access::decorated::yes>(&result[start]); \
13531356
\
13541357
sycl::vec<_DataType_output, vec_sz> res_vec; \
13551358
\
@@ -1363,11 +1366,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
13631366
_DataType_output>) \
13641367
{ \
13651368
sycl::vec<_DataType_input1, vec_sz> x1 = \
1366-
sg.load<vec_sz>( \
1367-
input1_ptrT(&input1_data[start])); \
1369+
sg.load<vec_sz>(input1_multi_ptr); \
13681370
sycl::vec<_DataType_input2, vec_sz> x2 = \
1369-
sg.load<vec_sz>( \
1370-
input2_ptrT(&input2_data[start])); \
1371+
sg.load<vec_sz>(input2_multi_ptr); \
13711372
\
13721373
res_vec = __vec_operation__; \
13731374
} \
@@ -1377,33 +1378,28 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
13771378
sycl::vec<_DataType_output, vec_sz> x1 = \
13781379
dpnp_vec_cast<_DataType_output, \
13791380
_DataType_input1, vec_sz>( \
1380-
sg.load<vec_sz>(input1_ptrT( \
1381-
&input1_data[start]))); \
1381+
sg.load<vec_sz>(input1_multi_ptr)); \
13821382
sycl::vec<_DataType_output, vec_sz> x2 = \
13831383
dpnp_vec_cast<_DataType_output, \
13841384
_DataType_input2, vec_sz>( \
1385-
sg.load<vec_sz>(input2_ptrT( \
1386-
&input2_data[start]))); \
1385+
sg.load<vec_sz>(input2_multi_ptr)); \
13871386
\
13881387
res_vec = __vec_operation__; \
13891388
} \
13901389
} \
13911390
else { \
13921391
sycl::vec<_DataType_input1, vec_sz> x1 = \
1393-
sg.load<vec_sz>( \
1394-
input1_ptrT(&input1_data[start])); \
1392+
sg.load<vec_sz>(input1_multi_ptr); \
13951393
sycl::vec<_DataType_input2, vec_sz> x2 = \
1396-
sg.load<vec_sz>( \
1397-
input2_ptrT(&input2_data[start])); \
1394+
sg.load<vec_sz>(input2_multi_ptr); \
13981395
\
13991396
for (size_t k = 0; k < vec_sz; ++k) { \
14001397
const _DataType_output input1_elem = x1[k]; \
14011398
const _DataType_output input2_elem = x2[k]; \
14021399
res_vec[k] = __operation__; \
14031400
} \
14041401
} \
1405-
sg.store<vec_sz>(result_ptrT(&result[start]), \
1406-
res_vec); \
1402+
sg.store<vec_sz>(result_multi_ptr, res_vec); \
14071403
} \
14081404
else { \
14091405
for (size_t k = start + sg.get_local_id()[0]; \

dpnp/backend/kernels/dpnp_krnl_logic.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,6 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
521521
else { \
522522
constexpr size_t lws = 64; \
523523
constexpr unsigned int vec_sz = 8; \
524-
constexpr sycl::access::address_space global_space = \
525-
sycl::access::address_space::global_space; \
526524
\
527525
auto gws_range = sycl::range<1>( \
528526
((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \
@@ -537,22 +535,28 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
537535
\
538536
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
539537
result_size) { \
540-
sycl::vec<_DataType_input1, vec_sz> x1 = sg.load<vec_sz>( \
541-
sycl::multi_ptr<_DataType_input1, global_space>( \
542-
&input1_data[start])); \
543-
sycl::vec<_DataType_input2, vec_sz> x2 = sg.load<vec_sz>( \
544-
sycl::multi_ptr<_DataType_input2, global_space>( \
545-
&input2_data[start])); \
538+
auto input1_multi_ptr = sycl::address_space_cast< \
539+
sycl::access::address_space::global_space, \
540+
sycl::access::decorated::yes>(&input1_data[start]); \
541+
auto input2_multi_ptr = sycl::address_space_cast< \
542+
sycl::access::address_space::global_space, \
543+
sycl::access::decorated::yes>(&input2_data[start]); \
544+
auto result_multi_ptr = sycl::address_space_cast< \
545+
sycl::access::address_space::global_space, \
546+
sycl::access::decorated::yes>(&result[start]); \
547+
\
548+
sycl::vec<_DataType_input1, vec_sz> x1 = \
549+
sg.load<vec_sz>(input1_multi_ptr); \
550+
sycl::vec<_DataType_input2, vec_sz> x2 = \
551+
sg.load<vec_sz>(input2_multi_ptr); \
546552
sycl::vec<bool, vec_sz> res_vec; \
547553
\
548554
for (size_t k = 0; k < vec_sz; ++k) { \
549555
const _DataType_input1 input1_elem = x1[k]; \
550556
const _DataType_input2 input2_elem = x2[k]; \
551557
res_vec[k] = __operation__; \
552558
} \
553-
sg.store<vec_sz>( \
554-
sycl::multi_ptr<bool, global_space>(&result[start]), \
555-
res_vec); \
559+
sg.store<vec_sz>(result_multi_ptr, res_vec); \
556560
} \
557561
else { \
558562
for (size_t k = start; k < result_size; ++k) { \

dpnp/backend/kernels/dpnp_krnl_mathematical.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,6 @@ DPCTLSyclEventRef
151151

152152
constexpr size_t lws = 64;
153153
constexpr unsigned int vec_sz = 8;
154-
constexpr sycl::access::address_space global_space =
155-
sycl::access::address_space::global_space;
156154

157155
auto gws_range =
158156
sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
@@ -166,18 +164,20 @@ DPCTLSyclEventRef
166164
sg.get_group_id()[0] * max_sg_size);
167165

168166
if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
169-
using input_ptrT =
170-
sycl::multi_ptr<_DataType_input, global_space>;
171-
using result_ptrT =
172-
sycl::multi_ptr<_DataType_output, global_space>;
167+
auto array_multi_ptr = sycl::address_space_cast<
168+
sycl::access::address_space::global_space,
169+
sycl::access::decorated::yes>(&array1[start]);
170+
auto result_multi_ptr = sycl::address_space_cast<
171+
sycl::access::address_space::global_space,
172+
sycl::access::decorated::yes>(&result[start]);
173173

174174
sycl::vec<_DataType_input, vec_sz> data_vec =
175-
sg.load<vec_sz>(input_ptrT(&array1[start]));
175+
sg.load<vec_sz>(array_multi_ptr);
176176

177177
sycl::vec<_DataType_output, vec_sz> res_vec =
178178
sycl::abs(data_vec);
179179

180-
sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);
180+
sg.store<vec_sz>(result_multi_ptr, res_vec);
181181
}
182182
else {
183183
for (size_t k = start + sg.get_local_id()[0]; k < size;

dpnp/dpnp_algo/dpnp_algo_mathematical.pxi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ cpdef tuple dpnp_modf(utils.dpnp_descriptor x1):
351351

352352

353353
cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
354-
cur_x1 = dpnp_copy(x1).get_pyobj()
354+
cur_x1 = x1.get_pyobj().copy()
355355

356356
cur_x1_flatiter = cur_x1.flat
357357

@@ -364,7 +364,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
364364

365365

366366
cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1):
367-
cur_x1 = dpnp_copy(x1).get_pyobj()
367+
cur_x1 = x1.get_pyobj().copy()
368368

369369
cur_x1_flatiter = cur_x1.flat
370370

dpnp/dpnp_algo/dpnp_elementwise_common.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,15 @@
6868
"dpnp_logical_or",
6969
"dpnp_logical_xor",
7070
"dpnp_multiply",
71+
"dpnp_negative",
7172
"dpnp_not_equal",
7273
"dpnp_power",
74+
"dpnp_proj",
7375
"dpnp_remainder",
7476
"dpnp_right_shift",
7577
"dpnp_round",
78+
"dpnp_sign",
79+
"dpnp_signbit",
7680
"dpnp_sin",
7781
"dpnp_sqrt",
7882
"dpnp_square",
@@ -1522,6 +1526,43 @@ def dpnp_power(x1, x2, out=None, order="K"):
15221526
return dpnp_array._create_from_usm_ndarray(res_usm)
15231527

15241528

1529+
_proj_docstring = """
1530+
proj(x, out=None, order="K")
1531+
1532+
Computes projection of each element `x_i` for input array `x`.
1533+
1534+
Args:
1535+
x (dpnp.ndarray):
1536+
Input array, expected to have numeric data type.
1537+
out ({None, dpnp.ndarray}, optional):
1538+
Output array to populate.
1539+
Array have the correct shape and the expected data type.
1540+
order ("C","F","A","K", optional):
1541+
Memory layout of the newly output array, if parameter `out` is `None`.
1542+
Default: "K".
1543+
Returns:
1544+
dpnp.ndarray:
1545+
An array containing the element-wise projection.
1546+
The returned array has the same data type as `x`.
1547+
"""
1548+
1549+
1550+
proj_func = UnaryElementwiseFunc(
1551+
"proj", ti._proj_result_type, ti._proj, _proj_docstring
1552+
)
1553+
1554+
1555+
def dpnp_proj(x, out=None, order="K"):
1556+
"""Invokes proj() from dpctl.tensor implementation for proj() function."""
1557+
1558+
# dpctl.tensor only works with usm_ndarray
1559+
x1_usm = dpnp.get_usm_ndarray(x)
1560+
out_usm = None if out is None else dpnp.get_usm_ndarray(out)
1561+
1562+
res_usm = proj_func(x1_usm, out=out_usm, order=order)
1563+
return dpnp_array._create_from_usm_ndarray(res_usm)
1564+
1565+
15251566
_remainder_docstring_ = """
15261567
remainder(x1, x2, out=None, order='K')
15271568
Calculates the remainder of division for each element `x1_i` of the input array
@@ -1708,6 +1749,44 @@ def dpnp_sign(x, out=None, order="K"):
17081749
return dpnp_array._create_from_usm_ndarray(res_usm)
17091750

17101751

1752+
_signbit_docstring = """
1753+
signbit(x, out=None, order="K")
1754+
1755+
Computes an indication of whether the sign bit of each element `x_i` of
1756+
input array `x` is set.
1757+
1758+
Args:
1759+
x (dpnp.ndarray):
1760+
Input array, expected to have numeric data type.
1761+
out ({None, dpnp.ndarray}, optional):
1762+
Output array to populate.
1763+
Array have the correct shape and the expected data type.
1764+
order ("C","F","A","K", optional):
1765+
Memory layout of the newly output array, if parameter `out` is `None`.
1766+
Default: "K".
1767+
Returns:
1768+
dpnp.ndarray:
1769+
An array containing the element-wise results. The returned array
1770+
must have a data type of `bool`.
1771+
"""
1772+
1773+
1774+
signbit_func = UnaryElementwiseFunc(
1775+
"signbit", ti._signbit_result_type, ti._signbit, _signbit_docstring
1776+
)
1777+
1778+
1779+
def dpnp_signbit(x, out=None, order="K"):
1780+
"""Invokes signbit() from dpctl.tensor implementation for signbit() function."""
1781+
1782+
# dpctl.tensor only works with usm_ndarray
1783+
x1_usm = dpnp.get_usm_ndarray(x)
1784+
out_usm = None if out is None else dpnp.get_usm_ndarray(out)
1785+
1786+
res_usm = signbit_func(x1_usm, out=out_usm, order=order)
1787+
return dpnp_array._create_from_usm_ndarray(res_usm)
1788+
1789+
17111790
_sin_docstring = """
17121791
sin(x, out=None, order='K')
17131792
Computes sine for each element `x_i` of input array `x`.

0 commit comments

Comments
 (0)