IntelPython · antonwolfy · Jun 12, 2023 · Jun 1, 2023 · Jun 6, 2023 · Jun 6, 2023
@@ -48,6 +48,7 @@ endfunction()
 build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp)
 add_subdirectory(backend)
 add_subdirectory(backend/extensions/lapack)
+add_subdirectory(backend/extensions/vm)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)

@@ -0,0 +1,75 @@
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+set(python_module_name _vm_impl)
+pybind11_add_module(${python_module_name} MODULE
+    vm_py.cpp
+    div.cpp
+)
+
+if (WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
+set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+
+if (WIN32)
+  target_compile_options(${python_module_name} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${python_module_name} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+if (UNIX)
+    # this option is support on Linux only
+    target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code)
+endif()
+
+if (DPNP_GENERATE_COVERAGE)
+    target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
+target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
+
+install(TARGETS ${python_module_name}
+  DESTINATION "dpnp/backend/extensions/vm"
+)
@@ -0,0 +1,307 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "div.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+
+namespace mkl_vm = oneapi::mkl::vm;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*div_impl_fn_ptr_t)(
+    sycl::queue, const std::int64_t, const char*, const char*, char*, const std::vector<sycl::event>&);
+
+static div_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event div_impl(sycl::queue exec_q,
+                            const std::int64_t n,
+                            const char* in_a,
+                            const char* in_b,
+                            char* out_y,
+                            const std::vector<sycl::event>& depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    const T* a = reinterpret_cast<const T*>(in_a);
+    const T* b = reinterpret_cast<const T*>(in_b);
+    T* y = reinterpret_cast<T*>(out_y);
+
+    return mkl_vm::div(exec_q,
+                       n, // number of elements to be calculated
+                       a, // pointer `a` containing 1st input vector of size n
+                       b, // pointer `b` containing 2nd input vector of size n
+                       y, // pointer `y` to the output vector of size n
+                       depends);
+}
+
+std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
+                                        dpctl::tensor::usm_ndarray src1,
+                                        dpctl::tensor::usm_ndarray src2,
+                                        dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise
+                                        const std::vector<sycl::event>& depends)
+{
+    // check type_nums
+    int src1_typenum = src1.get_typenum();
+    int src2_typenum = src2.get_typenum();
+    int dst_typenum = dst.get_typenum();
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
+    int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
+    int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
+
+    if (src1_typeid != src2_typeid || src2_typeid != dst_typeid)
+    {
+        throw py::value_error("Either any of input arrays or output array have different types");
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst}))
+    {
+        throw py::value_error("Execution queue is not compatible with allocation queues");
+    }
+
+    // check shapes, broadcasting is assumed done by caller
+    // check that dimensions are the same
+    int dst_nd = dst.get_ndim();
+    if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim())
+    {
+        throw py::value_error("Array dimensions are not the same.");
+    }
+
+    // check that shapes are the same
+    const py::ssize_t* src1_shape = src1.get_shape_raw();
+    const py::ssize_t* src2_shape = src2.get_shape_raw();
+    const py::ssize_t* dst_shape = dst.get_shape_raw();
+    bool shapes_equal(true);
+    size_t src_nelems(1);
+
+    for (int i = 0; i < dst_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(src1_shape[i]);
+        shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]);
+    }
+    if (!shapes_equal)
+    {
+        throw py::value_error("Array shapes are not the same.");
+    }
+
+    // if nelems is zero, return
+    if (src_nelems == 0)
+    {
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    // ensure that output is ample enough to accomodate all elements
+    auto dst_offsets = dst.get_minmax_offsets();
+    // destination must be ample enough to accomodate all elements
+    {
+        size_t range = static_cast<size_t>(dst_offsets.second - dst_offsets.first);
+        if (range + 1 < src_nelems)
+        {
+            throw py::value_error(
+                "Destination array can not accomodate all the "
+                "elements of source array.");
+        }
+    }
+
+    // check memory overlap
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(src1, dst) || overlap(src2, dst))
+    {
+        throw py::value_error("Arrays index overlapping segments of memory");
+    }
+
+    const char* src1_data = src1.get_data();
+    const char* src2_data = src2.get_data();
+    char* dst_data = dst.get_data();
+
+    // handle contiguous inputs
+    bool is_src1_c_contig = src1.is_c_contiguous();
+    bool is_src2_c_contig = src2.is_c_contiguous();
+    bool is_dst_c_contig = dst.is_c_contiguous();
+
+    bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
+    if (!all_c_contig)
+    {
+        throw py::value_error("Input and outpur arrays must be C-contiguous");
+    }
+
+    auto div_fn = div_dispatch_vector[dst_typeid];
+    if (div_fn == nullptr)
+    {
+        throw py::value_error("No div implementation defined");
+    }
+    sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
+
+    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
+    return std::make_pair(ht_ev, sum_ev);
+}
+
+bool can_call_div(sycl::queue exec_q,
+                  dpctl::tensor::usm_ndarray src1,
+                  dpctl::tensor::usm_ndarray src2,
+                  dpctl::tensor::usm_ndarray dst)
+{
+    // check type_nums
+    int src1_typenum = src1.get_typenum();
+    int src2_typenum = src2.get_typenum();
+    int dst_typenum = dst.get_typenum();
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
+    int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
+    int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
+
+    // types must be the same
+    if (src1_typeid != src2_typeid || src2_typeid != dst_typeid)
+    {
+        return false;
+    }
+
+    // OneMKL VM functions perform a copy on host if no double type support
+    if (!exec_q.get_device().has(sycl::aspect::fp64))
+    {
+        return false;
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst}))
+    {
+        return false;
+    }
+
+    // dimensions must be the same
+    int dst_nd = dst.get_ndim();
+    if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim())
+    {
+        return false;
+    }
+
+    // shapes must be the same
+    const py::ssize_t* src1_shape = src1.get_shape_raw();
+    const py::ssize_t* src2_shape = src2.get_shape_raw();
+    const py::ssize_t* dst_shape = dst.get_shape_raw();
+    bool shapes_equal(true);
+    size_t src_nelems(1);
+
+    for (int i = 0; i < dst_nd; ++i)
+    {
+        src_nelems *= static_cast<size_t>(src1_shape[i]);
+        shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]);
+    }
+    if (!shapes_equal)
+    {
+        return false;
+    }
+
+    // if nelems is zero, return false
+    if (src_nelems == 0)
+    {
+        return false;
+    }
+
+    // ensure that output is ample enough to accomodate all elements
+    auto dst_offsets = dst.get_minmax_offsets();
+    // destination must be ample enough to accomodate all elements
+    {
+        size_t range = static_cast<size_t>(dst_offsets.second - dst_offsets.first);
+        if (range + 1 < src_nelems)
+        {
+            return false;
+        }
+    }
+
+    // check memory overlap
+    auto const& overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(src1, dst) || overlap(src2, dst))
+    {
+        return false;
+    }
+
+    // suppport only contiguous inputs
+    bool is_src1_c_contig = src1.is_c_contiguous();
+    bool is_src2_c_contig = src2.is_c_contiguous();
+    bool is_dst_c_contig = dst.is_c_contiguous();
+
+    bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
+    if (!all_c_contig)
+    {
+        return false;
+    }
+
+    // MKL function is not defined for the type
+    if (div_dispatch_vector[src1_typeid] == nullptr)
+    {
+        return false;
+    }
+    return true;
+}
+
+template <typename fnT, typename T>
+struct DivContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::DivTypePairSupportFactory<T>::is_defined)
+        {
+            return div_impl<T>;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+};
+
+void init_div_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<div_impl_fn_ptr_t, DivContigFactory, dpctl_td_ns::num_types> contig;
+    contig.populate_dispatch_vector(div_dispatch_vector);
+}
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp