Implement syevd_batch via syevd call

vlad-perevezentsev · vlad-perevezentsev · commit 19d0540f29b3 · 2024-06-03T16:10:22.000+02:00
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -41,6 +41,7 @@ set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/syevd_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp
 )
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -60,6 +60,7 @@ void init_dispatch_vectors(void)
     lapack_ext::init_orgqr_dispatch_vector();
     lapack_ext::init_potrf_batch_dispatch_vector();
     lapack_ext::init_potrf_dispatch_vector();
+    lapack_ext::init_syevd_batch_dispatch_vector();
     lapack_ext::init_syevd_dispatch_vector();
     lapack_ext::init_ungqr_batch_dispatch_vector();
     lapack_ext::init_ungqr_dispatch_vector();
@@ -183,6 +184,14 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("eig_vecs"), py::arg("eig_vals"),
           py::arg("depends") = py::list());
 
+    m.def("_syevd_batch", &lapack_ext::syevd_batch,
+          "Call `syevd` from OneMKL LAPACK library in a loop to return "
+          "the eigenvalues and eigenvectors of a batch of real symmetric "
+          "matrices",
+          py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
+          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("depends") = py::list());
+
     m.def("_ungqr_batch", &lapack_ext::ungqr_batch,
           "Call `_ungqr_batch` from OneMKL LAPACK library to return "
           "the complex unitary matrices matrix Qi of the QR factorization "
diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp
@@ -46,7 +46,16 @@ extern std::pair<sycl::event, sycl::event>
           dpctl::tensor::usm_ndarray eig_vals,
           const std::vector<sycl::event> &depends = {});
 
+extern std::pair<sycl::event, sycl::event>
+    syevd_batch(sycl::queue exec_q,
+                const std::int8_t jobz,
+                const std::int8_t upper_lower,
+                dpctl::tensor::usm_ndarray eig_vecs,
+                dpctl::tensor::usm_ndarray eig_vals,
+                const std::vector<sycl::event> &depends = {});
+
 extern void init_syevd_dispatch_vector(void);
+extern void init_syevd_batch_dispatch_vector(void);
 } // namespace lapack
 } // namespace ext
 } // namespace backend
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -0,0 +1,278 @@
+//*****************************************************************************
+// Copyright (c) 2023-2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+// #include "copy_and_cast_usm_to_usm.hpp"
+
+#include "syevd.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*syevd_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    const oneapi::mkl::job,
+    const oneapi::mkl::uplo,
+    const std::int64_t,
+    char *,
+    char *,
+    std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+static syevd_batch_impl_fn_ptr_t
+    syevd_batch_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event syevd_batch_impl(sycl::queue exec_q,
+                                    const oneapi::mkl::job jobz,
+                                    const oneapi::mkl::uplo upper_lower,
+                                    const std::int64_t n,
+                                    char *in_a,
+                                    char *out_w,
+                                    std::vector<sycl::event> &host_task_events,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+    T *w = reinterpret_cast<T *>(out_w);
+
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const std::int64_t scratchpad_size =
+        mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    sycl::event syevd_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        syevd_event = mkl_lapack::syevd(
+            exec_q,
+            jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are
+                  // computed.
+            upper_lower, // 'upper_lower == job::upper' means the upper
+                         // triangular part of A, or the lower triangular
+                         // otherwise
+            n,           // The order of the matrix A (0 <= n)
+            a, // Pointer to A, size (lda, *), where the 2nd dimension, must be
+               // at least max(1, n) If 'jobz == job::vec', then on exit it will
+               // contain the eigenvectors of A
+            lda, // The leading dimension of a, must be at least max(1, n)
+            w,   // Pointer to array of size at least n, it will contain the
+                 // eigenvalues of A in ascending order
+            scratchpad, // Pointer to scratchpad memory to be used by MKL
+                        // routine for storing intermediate results
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        error_msg
+            << "Unexpected MKL exception caught during syevd() call:\nreason: "
+            << e.what() << "\ninfo: " << e.info();
+        info = e.info();
+    } catch (sycl::exception const &e) {
+        error_msg << "Unexpected SYCL exception caught during syevd() call:\n"
+                  << e.what();
+        info = -1;
+    }
+
+    if (info != 0) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(syevd_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return syevd_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    syevd_batch(sycl::queue exec_q,
+                const std::int8_t jobz,
+                const std::int8_t upper_lower,
+                dpctl::tensor::usm_ndarray eig_vecs,
+                dpctl::tensor::usm_ndarray eig_vals,
+                const std::vector<sycl::event> &depends)
+{
+    const int eig_vecs_nd = eig_vecs.get_ndim();
+    const int eig_vals_nd = eig_vals.get_ndim();
+
+    if (eig_vecs_nd != 3) {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
+                              " of an output array with eigenvectors");
+    }
+    else if (eig_vals_nd != 2) {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
+                              " of an output array with eigenvalues");
+    }
+
+    const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
+    const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
+
+    if (eig_vecs_shape[1] != eig_vecs_shape[2]) {
+        throw py::value_error(
+            "The last two dimensions of 'eig_vecs' must be the same.");
+    }
+    else if (eig_vecs_shape[0] != eig_vals_shape[0] ||
+             eig_vecs_shape[1] != eig_vals_shape[1])
+    {
+        throw py::value_error(
+            "The shape of 'eig_vals' must be (batch_size, n), "
+            "where batch_size = " +
+            std::to_string(eig_vecs_shape[0]) +
+            " and n = " + std::to_string(eig_vecs_shape[1]));
+    }
+
+    size_t src_nelems(1);
+
+    for (int i = 0; i < eig_vecs_nd; ++i) {
+        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
+    }
+
+    if (src_nelems == 0) {
+        // nothing to do
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals)) {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are "
+                              "overlapping segments of memory");
+    }
+
+    bool is_eig_vecs_c_contig = eig_vecs.is_c_contiguous();
+    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    if (!is_eig_vecs_c_contig) {
+        throw py::value_error(
+            "An array with input matrix / output eigenvectors "
+            "must be C-contiguous");
+    }
+    else if (!is_eig_vals_c_contig) {
+        throw py::value_error(
+            "An array with output eigenvalues must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int eig_vecs_type_id =
+        array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
+    int eig_vals_type_id =
+        array_types.typenum_to_lookup_id(eig_vals.get_typenum());
+
+    if (eig_vecs_type_id != eig_vals_type_id) {
+        throw py::value_error(
+            "Types of eigenvectors and eigenvalues are mismatched");
+    }
+
+    syevd_batch_impl_fn_ptr_t syevd_batch_fn =
+        syevd_batch_dispatch_vector[eig_vecs_type_id];
+    if (syevd_batch_fn == nullptr) {
+        throw py::value_error("No syevd implementation defined for a type of "
+                              "eigenvectors and eigenvalues");
+    }
+
+    char *eig_vecs_data = eig_vecs.get_data();
+    char *eig_vals_data = eig_vals.get_data();
+
+    const std::int64_t batch_size = eig_vecs_shape[0];
+    const std::int64_t n = eig_vecs_shape[1];
+    int elemsize = eig_vecs.get_elemsize();
+
+    const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
+    const oneapi::mkl::uplo uplo_val =
+        static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    std::vector<sycl::event> host_task_events;
+
+    for (std::int64_t i = 0; i < batch_size; ++i) {
+        char *eig_vecs_batch = eig_vecs_data + i * n * n * elemsize;
+        char *eig_vals_batch = eig_vals_data + i * n * elemsize;
+
+        sycl::event syevd_ev =
+            syevd_batch_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_batch,
+                           eig_vals_batch, host_task_events, depends);
+    }
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        exec_q, {eig_vecs, eig_vals}, host_task_events);
+
+    return std::make_pair(args_ev, args_ev);
+}
+
+template <typename fnT, typename T>
+struct SyevdBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::SyevdBatchTypePairSupportFactory<T>::is_defined) {
+            return syevd_batch_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_syevd_batch_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<syevd_batch_impl_fn_ptr_t,
+                                       SyevdBatchContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(syevd_batch_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -383,6 +383,24 @@ struct SyevdTypePairSupportFactory
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
 
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix A and an output arrays with
+ * eigenvectors and eigenvectors.
+ */
+template <typename T>
+struct SyevdBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL LAPACK library provides support in oneapi::mkl::lapack::ungqr_batch<T>
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@ set(_module_src`
`41`	`41`	`${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp`
`42`	`42`	`${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp`
`43`	`43`	`${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp`
	`44`	`+ ${CMAKE_CURRENT_SOURCE_DIR}/syevd_batch.cpp`
`44`	`45`	`${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp`
`45`	`46`	`${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp`
`46`	`47`	`)`