Skip to content

Commit b08ddfd

Browse files
Merge master into impl_nan_to_num
2 parents b20ce16 + 808b976 commit b08ddfd

37 files changed

+1377
-349
lines changed

.github/workflows/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* @antonwolfy @npolina4 @vlad-perevezentsev @vtavana

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ repos:
9696
[
9797
"-rn", # Only display messages
9898
"-sn", # Don't display the score
99+
"--disable=c-extension-no-member",
99100
"--disable=import-error",
100101
"--disable=redefined-builtin",
101102
"--disable=unused-wildcard-import"

doc/reference/logic.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Comparison
6868
dpnp.allclose
6969
dpnp.isclose
7070
dpnp.array_equal
71+
dpnp.array_equiv
7172
dpnp.greater
7273
dpnp.greater_equal
7374
dpnp.less

doc/reference/ufunc.rst

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,27 @@ Math operations
2020
dpnp.add
2121
dpnp.subtract
2222
dpnp.multiply
23+
dpnp.matmul
2324
dpnp.divide
2425
dpnp.logaddexp
2526
dpnp.logaddexp2
2627
dpnp.true_divide
2728
dpnp.floor_divide
2829
dpnp.negative
30+
dpnp.positive
2931
dpnp.power
32+
dpnp.float_power
3033
dpnp.remainder
3134
dpnp.mod
3235
dpnp.fmod
33-
dpnp.abs
36+
dpnp.divmod
3437
dpnp.absolute
3538
dpnp.fabs
3639
dpnp.rint
3740
dpnp.sign
41+
dpnp.heaviside
42+
dpnp.conj
43+
dpnp.conjugate
3844
dpnp.exp
3945
dpnp.exp2
4046
dpnp.log
@@ -44,13 +50,24 @@ Math operations
4450
dpnp.log1p
4551
dpnp.proj
4652
dpnp.sqrt
47-
dpnp.cbrt
4853
dpnp.square
54+
dpnp.cbrt
4955
dpnp.reciprocal
5056
dpnp.rsqrt
5157
dpnp.gcd
5258
dpnp.lcm
5359

60+
.. tip::
61+
62+
The optional output arguments can be used to help you save memory
63+
for large calculations. If your arrays are large, complicated
64+
expressions can take longer than absolutely necessary due to the
65+
creation and (later) destruction of temporary calculation
66+
spaces. For example, the expression ``G = A * B + C`` is equivalent to
67+
``T1 = A * B; G = T1 + C; del T1``. It will be more quickly executed
68+
as ``G = A * B; add(G, C, G)`` which is the same as
69+
``G = A * B; G += C``.
70+
5471

5572
Trigonometric functions
5673
~~~~~~~~~~~~~~~~~~~~~~~

dpnp/backend/extensions/lapack/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ set(_module_src
3030
${CMAKE_CURRENT_SOURCE_DIR}/geqrf.cpp
3131
${CMAKE_CURRENT_SOURCE_DIR}/geqrf_batch.cpp
3232
${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp
33+
${CMAKE_CURRENT_SOURCE_DIR}/gesv_batch.cpp
3334
${CMAKE_CURRENT_SOURCE_DIR}/gesvd.cpp
3435
${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp
3536
${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp

dpnp/backend/extensions/lapack/common_helpers.hpp

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
//*****************************************************************************
2525

2626
#pragma once
27+
#include <pybind11/pybind11.h>
28+
#include <sycl/sycl.hpp>
29+
2730
#include <complex>
2831
#include <cstring>
29-
#include <pybind11/pybind11.h>
3032
#include <stdexcept>
3133

3234
namespace dpnp::extensions::lapack::helper
@@ -63,4 +65,89 @@ inline bool check_zeros_shape(int ndim, const py::ssize_t *shape)
6365
}
6466
return src_nelems == 0;
6567
}
68+
69+
// Allocate the memory for the pivot indices
70+
inline std::int64_t *alloc_ipiv(const std::int64_t n, sycl::queue &exec_q)
71+
{
72+
std::int64_t *ipiv = nullptr;
73+
74+
try {
75+
ipiv = sycl::malloc_device<std::int64_t>(n, exec_q);
76+
if (!ipiv) {
77+
throw std::runtime_error("Device allocation for ipiv failed");
78+
}
79+
} catch (sycl::exception const &e) {
80+
if (ipiv != nullptr)
81+
sycl::free(ipiv, exec_q);
82+
throw std::runtime_error(
83+
std::string(
84+
"Unexpected SYCL exception caught during ipiv allocation: ") +
85+
e.what());
86+
}
87+
88+
return ipiv;
89+
}
90+
91+
// Allocate the total memory for the total pivot indices with proper alignment
92+
// for batch implementations
93+
template <typename T>
94+
inline std::int64_t *alloc_ipiv_batch(const std::int64_t n,
95+
std::int64_t n_linear_streams,
96+
sycl::queue &exec_q)
97+
{
98+
// Get padding size to ensure memory allocations are aligned to 256 bytes
99+
// for better performance
100+
const std::int64_t padding = 256 / sizeof(T);
101+
102+
// Calculate the total size needed for the pivot indices array for all
103+
// linear streams with proper alignment
104+
size_t alloc_ipiv_size = round_up_mult(n_linear_streams * n, padding);
105+
106+
return alloc_ipiv(alloc_ipiv_size, exec_q);
107+
}
108+
109+
// Allocate the memory for the scratchpad
110+
template <typename T>
111+
inline T *alloc_scratchpad(std::int64_t scratchpad_size, sycl::queue &exec_q)
112+
{
113+
T *scratchpad = nullptr;
114+
115+
try {
116+
if (scratchpad_size > 0) {
117+
scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
118+
if (!scratchpad) {
119+
throw std::runtime_error(
120+
"Device allocation for scratchpad failed");
121+
}
122+
}
123+
} catch (sycl::exception const &e) {
124+
if (scratchpad != nullptr) {
125+
sycl::free(scratchpad, exec_q);
126+
}
127+
throw std::runtime_error(std::string("Unexpected SYCL exception caught "
128+
"during scratchpad allocation: ") +
129+
e.what());
130+
}
131+
132+
return scratchpad;
133+
}
134+
135+
// Allocate the total scratchpad memory with proper alignment for batch
136+
// implementations
137+
template <typename T>
138+
inline T *alloc_scratchpad_batch(std::int64_t scratchpad_size,
139+
std::int64_t n_linear_streams,
140+
sycl::queue &exec_q)
141+
{
142+
// Get padding size to ensure memory allocations are aligned to 256 bytes
143+
// for better performance
144+
const std::int64_t padding = 256 / sizeof(T);
145+
146+
// Calculate the total scratchpad memory size needed for all linear
147+
// streams with proper alignment
148+
const size_t alloc_scratch_size =
149+
round_up_mult(n_linear_streams * scratchpad_size, padding);
150+
151+
return alloc_scratchpad<T>(alloc_scratch_size, exec_q);
152+
}
66153
} // namespace dpnp::extensions::lapack::helper

dpnp/backend/extensions/lapack/evd_batch_common.hpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -119,34 +119,4 @@ std::pair<sycl::event, sycl::event>
119119

120120
return std::make_pair(ht_ev, evd_batch_ev);
121121
}
122-
123-
template <typename T>
124-
inline T *alloc_scratchpad(std::int64_t scratchpad_size,
125-
std::int64_t n_linear_streams,
126-
sycl::queue &exec_q)
127-
{
128-
// Get padding size to ensure memory allocations are aligned to 256 bytes
129-
// for better performance
130-
const std::int64_t padding = 256 / sizeof(T);
131-
132-
if (scratchpad_size <= 0) {
133-
throw std::runtime_error(
134-
"Invalid scratchpad size: must be greater than zero."
135-
" Calculated scratchpad size: " +
136-
std::to_string(scratchpad_size));
137-
}
138-
139-
// Calculate the total scratchpad memory size needed for all linear
140-
// streams with proper alignment
141-
const size_t alloc_scratch_size =
142-
helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
143-
144-
// Allocate memory for the total scratchpad
145-
T *scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
146-
if (!scratchpad) {
147-
throw std::runtime_error("Device allocation for scratchpad failed");
148-
}
149-
150-
return scratchpad;
151-
}
152122
} // namespace dpnp::extensions::lapack::evd

0 commit comments

Comments
 (0)