IntelPython
diff --git a/‎.github/workflows/conda-package.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/conda-package.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/openssf-scorecard.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/openssf-scorecard.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/reference/ufunc.rst
Lines changed: 19 additions & 2 deletions b/‎doc/reference/ufunc.rst
Lines changed: 19 additions & 2 deletions
diff --git a/‎dpnp/backend/extensions/ufunc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎dpnp/backend/extensions/ufunc/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
Lines changed: 2 additions & 0 deletions b/‎dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp
Lines changed: 100 additions & 0 deletions b/‎dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp
Lines changed: 100 additions & 0 deletions
diff --git a/‎dpnp/backend/extensions/ufunc/elementwise_functions/float_power.hpp
Lines changed: 35 additions & 0 deletions b/‎dpnp/backend/extensions/ufunc/elementwise_functions/float_power.hpp
Lines changed: 35 additions & 0 deletions
diff --git a/‎dpnp/dpnp_iface.py
Lines changed: 3 additions & 1 deletion b/‎dpnp/dpnp_iface.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎dpnp/dpnp_iface_counting.py
Lines changed: 29 additions & 14 deletions b/‎dpnp/dpnp_iface_counting.py
Lines changed: 29 additions & 14 deletions
diff --git a/‎dpnp/dpnp_iface_logic.py
Lines changed: 8 additions & 13 deletions b/‎dpnp/dpnp_iface_logic.py
Lines changed: 8 additions & 13 deletions
@@ -144,13 +144,13 @@ jobs:
         run: conda build --no-test --python ${{ matrix.python }} --numpy 1.24 ${{ env.CHANNELS }} conda-recipe
 
       - name: Upload artifact
-        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
+        uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2
 
       - name: Upload wheels artifact
-        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
+        uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Wheels Python ${{ matrix.python }}
           path: ${{ env.WHEELS_OUTPUT_FOLDER }}${{ env.PACKAGE_NAME }}-*.whl
 
@@ -60,7 +60,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
+        uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5
         with:
           name: SARIF file
           path: results.sarif
 
@@ -20,21 +20,27 @@ Math operations
    dpnp.add
    dpnp.subtract
    dpnp.multiply
+   dpnp.matmul
    dpnp.divide
    dpnp.logaddexp
    dpnp.logaddexp2
    dpnp.true_divide
    dpnp.floor_divide
    dpnp.negative
+   dpnp.positive
    dpnp.power
+   dpnp.float_power
    dpnp.remainder
    dpnp.mod
    dpnp.fmod
-   dpnp.abs
+   dpnp.divmod
    dpnp.absolute
    dpnp.fabs
    dpnp.rint
    dpnp.sign
+   dpnp.heaviside
+   dpnp.conj
+   dpnp.conjugate
    dpnp.exp
    dpnp.exp2
    dpnp.log
@@ -44,13 +50,24 @@ Math operations
    dpnp.log1p
    dpnp.proj
    dpnp.sqrt
-   dpnp.cbrt
    dpnp.square
+   dpnp.cbrt
    dpnp.reciprocal
    dpnp.rsqrt
    dpnp.gcd
    dpnp.lcm
 
+.. tip::
+
+   The optional output arguments can be used to help you save memory
+   for large calculations. If your arrays are large, complicated
+   expressions can take longer than absolutely necessary due to the
+   creation and (later) destruction of temporary calculation
+   spaces. For example, the expression ``G = A * B + C`` is equivalent to
+   ``T1 = A * B; G = T1 + C; del T1``. It will be more quickly executed
+   as ``G = A * B; add(G, C, G)`` which is the same as
+   ``G = A * B; G += C``.
+
 
 Trigonometric functions
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -27,6 +27,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/degrees.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/float_power.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmod.cpp
 
@@ -27,6 +27,7 @@
 
 #include "degrees.hpp"
 #include "fabs.hpp"
+#include "float_power.hpp"
 #include "fmax.hpp"
 #include "fmin.hpp"
 #include "fmod.hpp"
@@ -44,6 +45,7 @@ void init_elementwise_functions(py::module_ m)
 {
     init_degrees(m);
     init_fabs(m);
+    init_float_power(m);
     init_fmax(m);
     init_fmin(m);
     init_fmod(m);
 
@@ -0,0 +1,100 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// maxification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "float_power.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../../elementwise_functions/elementwise_functions.hpp"
+
+// dpctl tensor headers
+#include "utils/type_dispatch.hpp"
+
+namespace dpnp::extensions::ufunc
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+
+namespace impl
+{
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+// Supports only float and complex types
+template <typename T1, typename T2>
+struct OutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        std::complex<float>>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+static int float_power_output_typeid_table[td_ns::num_types][td_ns::num_types];
+
+template <typename fnT, typename T1, typename T2>
+struct TypeMapFactory
+{
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename OutputType<T1, T2>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+void populate_float_power_dispatch_tables(void)
+{
+    td_ns::DispatchTableBuilder<int, TypeMapFactory, td_ns::num_types> dvb;
+    dvb.populate_dispatch_table(float_power_output_typeid_table);
+}
+} // namespace impl
+
+void init_float_power(py::module_ m)
+{
+    impl::populate_float_power_dispatch_tables();
+    using impl::float_power_output_typeid_table;
+
+    auto float_power_result_type_pyapi = [&](const py::dtype &dtype1,
+                                             const py::dtype &dtype2) {
+        return py_int::py_binary_ufunc_result_type(
+            dtype1, dtype2, float_power_output_typeid_table);
+    };
+    m.def("_float_power_result_type", float_power_result_type_pyapi);
+}
+} // namespace dpnp::extensions::ufunc
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_float_power(py::module_ m);
+} // namespace dpnp::extensions::ufunc
@@ -654,7 +654,7 @@ def get_result_array(a, out=None, casting="safe"):
 
     Parameters
     ----------
-    a : {dpnp_array}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     out : {dpnp.ndarray, usm_ndarray}
         If provided, value of `a` array will be copied into it
@@ -671,6 +671,8 @@ def get_result_array(a, out=None, casting="safe"):
     """
 
     if out is None:
+        if isinstance(a, dpt.usm_ndarray):
+            return dpnp_array._create_from_usm_ndarray(a)
         return a
 
     if isinstance(out, dpt.usm_ndarray):
 
@@ -44,25 +44,38 @@
 __all__ = ["count_nonzero"]
 
 
-def count_nonzero(a, axis=None, *, keepdims=False):
+def count_nonzero(a, axis=None, *, keepdims=False, out=None):
     """
     Counts the number of non-zero values in the array `a`.
 
     For full documentation refer to :obj:`numpy.count_nonzero`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        The array for which to count non-zeros.
+    axis : {None, int, tuple}, optional
+        Axis or tuple of axes along which to count non-zeros.
+        Default value means that non-zeros will be counted along a flattened
+        version of `a`.
+        Default: ``None``.
+    keepdims : bool, optional
+        If this is set to ``True``, the axes that are counted are left in the
+        result as dimensions with size one. With this option, the result will
+        broadcast correctly against the input array.
+        Default: ``False``.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        The array into which the result is written. The data type of `out` must
+        match the expected shape and the expected data type of the result.
+        If ``None`` then a new array is returned.
+        Default: ``None``.
+
     Returns
     -------
     out : dpnp.ndarray
         Number of non-zero values in the array along a given axis.
-        Otherwise, a zero-dimensional array with the total number of
-        non-zero values in the array is returned.
-
-    Limitations
-    -----------
-    Parameters `a` is supported as either :class:`dpnp.ndarray`
-    or :class:`dpctl.tensor.usm_ndarray`.
-    Otherwise ``TypeError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+        Otherwise, a zero-dimensional array with the total number of non-zero
+        values in the array is returned.
 
     See Also
     --------
@@ -87,8 +100,10 @@ def count_nonzero(a, axis=None, *, keepdims=False):
 
     """
 
-    # TODO: might be improved by implementing an extension
-    # with `count_nonzero` kernel
     usm_a = dpnp.get_usm_ndarray(a)
-    usm_a = dpt.astype(usm_a, dpnp.bool, copy=False)
-    return dpnp.sum(usm_a, axis=axis, dtype=dpnp.intp, keepdims=keepdims)
+    usm_out = None if out is None else dpnp.get_usm_ndarray(out)
+
+    usm_res = dpt.count_nonzero(
+        usm_a, axis=axis, keepdims=keepdims, out=usm_out
+    )
+    return dpnp.get_result_array(usm_res, out)
@@ -51,7 +51,6 @@
 
 import dpnp
 from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
-from dpnp.dpnp_array import dpnp_array
 
 __all__ = [
     "all",
@@ -167,13 +166,11 @@ def all(a, /, axis=None, out=None, keepdims=False, *, where=True):
 
     dpnp.check_limitations(where=where)
 
-    dpt_array = dpnp.get_usm_ndarray(a)
-    result = dpnp_array._create_from_usm_ndarray(
-        dpt.all(dpt_array, axis=axis, keepdims=keepdims)
-    )
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.all(usm_a, axis=axis, keepdims=keepdims)
+
     # TODO: temporary solution until dpt.all supports out parameter
-    result = dpnp.get_result_array(result, out)
-    return result
+    return dpnp.get_result_array(usm_res, out)
 
 
 def allclose(a, b, rtol=1.0e-5, atol=1.0e-8, equal_nan=False):
@@ -333,13 +330,11 @@ def any(a, /, axis=None, out=None, keepdims=False, *, where=True):
 
     dpnp.check_limitations(where=where)
 
-    dpt_array = dpnp.get_usm_ndarray(a)
-    result = dpnp_array._create_from_usm_ndarray(
-        dpt.any(dpt_array, axis=axis, keepdims=keepdims)
-    )
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.any(usm_a, axis=axis, keepdims=keepdims)
+
     # TODO: temporary solution until dpt.any supports out parameter
-    result = dpnp.get_result_array(result, out)
-    return result
+    return dpnp.get_result_array(usm_res, out)
 
 
 _EQUAL_DOCSTRING = """