Skip to content

Commit 8b8c4a3

Browse files
authored
Merge branch 'master' into build_target_cuda
2 parents 632d164 + 1e5ba88 commit 8b8c4a3

File tree

13 files changed

+675
-258
lines changed

13 files changed

+675
-258
lines changed

dpnp/backend/extensions/lapack/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ set(_module_src
3030
${CMAKE_CURRENT_SOURCE_DIR}/geqrf.cpp
3131
${CMAKE_CURRENT_SOURCE_DIR}/geqrf_batch.cpp
3232
${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp
33+
${CMAKE_CURRENT_SOURCE_DIR}/gesv_batch.cpp
3334
${CMAKE_CURRENT_SOURCE_DIR}/gesvd.cpp
3435
${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp
3536
${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp

dpnp/backend/extensions/lapack/common_helpers.hpp

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
//*****************************************************************************
2525

2626
#pragma once
27+
#include <pybind11/pybind11.h>
28+
#include <sycl/sycl.hpp>
29+
2730
#include <complex>
2831
#include <cstring>
29-
#include <pybind11/pybind11.h>
3032
#include <stdexcept>
3133

3234
namespace dpnp::extensions::lapack::helper
@@ -63,4 +65,89 @@ inline bool check_zeros_shape(int ndim, const py::ssize_t *shape)
6365
}
6466
return src_nelems == 0;
6567
}
68+
69+
// Allocate the memory for the pivot indices
70+
inline std::int64_t *alloc_ipiv(const std::int64_t n, sycl::queue &exec_q)
71+
{
72+
std::int64_t *ipiv = nullptr;
73+
74+
try {
75+
ipiv = sycl::malloc_device<std::int64_t>(n, exec_q);
76+
if (!ipiv) {
77+
throw std::runtime_error("Device allocation for ipiv failed");
78+
}
79+
} catch (sycl::exception const &e) {
80+
if (ipiv != nullptr)
81+
sycl::free(ipiv, exec_q);
82+
throw std::runtime_error(
83+
std::string(
84+
"Unexpected SYCL exception caught during ipiv allocation: ") +
85+
e.what());
86+
}
87+
88+
return ipiv;
89+
}
90+
91+
// Allocate the total memory for the total pivot indices with proper alignment
92+
// for batch implementations
93+
template <typename T>
94+
inline std::int64_t *alloc_ipiv_batch(const std::int64_t n,
95+
std::int64_t n_linear_streams,
96+
sycl::queue &exec_q)
97+
{
98+
// Get padding size to ensure memory allocations are aligned to 256 bytes
99+
// for better performance
100+
const std::int64_t padding = 256 / sizeof(T);
101+
102+
// Calculate the total size needed for the pivot indices array for all
103+
// linear streams with proper alignment
104+
size_t alloc_ipiv_size = round_up_mult(n_linear_streams * n, padding);
105+
106+
return alloc_ipiv(alloc_ipiv_size, exec_q);
107+
}
108+
109+
// Allocate the memory for the scratchpad
110+
template <typename T>
111+
inline T *alloc_scratchpad(std::int64_t scratchpad_size, sycl::queue &exec_q)
112+
{
113+
T *scratchpad = nullptr;
114+
115+
try {
116+
if (scratchpad_size > 0) {
117+
scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
118+
if (!scratchpad) {
119+
throw std::runtime_error(
120+
"Device allocation for scratchpad failed");
121+
}
122+
}
123+
} catch (sycl::exception const &e) {
124+
if (scratchpad != nullptr) {
125+
sycl::free(scratchpad, exec_q);
126+
}
127+
throw std::runtime_error(std::string("Unexpected SYCL exception caught "
128+
"during scratchpad allocation: ") +
129+
e.what());
130+
}
131+
132+
return scratchpad;
133+
}
134+
135+
// Allocate the total scratchpad memory with proper alignment for batch
136+
// implementations
137+
template <typename T>
138+
inline T *alloc_scratchpad_batch(std::int64_t scratchpad_size,
139+
std::int64_t n_linear_streams,
140+
sycl::queue &exec_q)
141+
{
142+
// Get padding size to ensure memory allocations are aligned to 256 bytes
143+
// for better performance
144+
const std::int64_t padding = 256 / sizeof(T);
145+
146+
// Calculate the total scratchpad memory size needed for all linear
147+
// streams with proper alignment
148+
const size_t alloc_scratch_size =
149+
round_up_mult(n_linear_streams * scratchpad_size, padding);
150+
151+
return alloc_scratchpad<T>(alloc_scratch_size, exec_q);
152+
}
66153
} // namespace dpnp::extensions::lapack::helper

dpnp/backend/extensions/lapack/evd_batch_common.hpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -119,34 +119,4 @@ std::pair<sycl::event, sycl::event>
119119

120120
return std::make_pair(ht_ev, evd_batch_ev);
121121
}
122-
123-
template <typename T>
124-
inline T *alloc_scratchpad(std::int64_t scratchpad_size,
125-
std::int64_t n_linear_streams,
126-
sycl::queue &exec_q)
127-
{
128-
// Get padding size to ensure memory allocations are aligned to 256 bytes
129-
// for better performance
130-
const std::int64_t padding = 256 / sizeof(T);
131-
132-
if (scratchpad_size <= 0) {
133-
throw std::runtime_error(
134-
"Invalid scratchpad size: must be greater than zero."
135-
" Calculated scratchpad size: " +
136-
std::to_string(scratchpad_size));
137-
}
138-
139-
// Calculate the total scratchpad memory size needed for all linear
140-
// streams with proper alignment
141-
const size_t alloc_scratch_size =
142-
helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
143-
144-
// Allocate memory for the total scratchpad
145-
T *scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
146-
if (!scratchpad) {
147-
throw std::runtime_error("Device allocation for scratchpad failed");
148-
}
149-
150-
return scratchpad;
151-
}
152122
} // namespace dpnp::extensions::lapack::evd

0 commit comments

Comments
 (0)