Merge pull request #1328 from IntelPython/fix-some-array-api-test-cases

oleksandr-pavlyk · web-flow · commit df8eb5fa914b · 2023-08-08T05:13:03.000-05:00
diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py
@@ -290,78 +290,6 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src):
     _copy_same_shape(dst, src_same_shape)
 
 
-def copy(usm_ary, order="K"):
-    """copy(ary, order="K")
-
-    Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`.
-
-    Args:
-        ary (usm_ndarray):
-            Input array.
-        order ({"C", "F", "A", "K"}, optional):
-            Controls the memory layout of the output array.
-    Returns:
-        usm_ndarray:
-            A copy of the input array.
-
-    Memory layout of the copy is controlled by `order` keyword,
-    following NumPy's conventions. The `order` keywords can be
-    one of the following:
-
-       - "C": C-contiguous memory layout
-       - "F": Fortran-contiguous memory layout
-       - "A": Fortran-contiguous if the input array is also Fortran-contiguous,
-         otherwise C-contiguous
-       - "K": match the layout of `usm_ary` as closely as possible.
-
-    """
-    if not isinstance(usm_ary, dpt.usm_ndarray):
-        return TypeError(
-            f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}"
-        )
-    copy_order = "C"
-    if order == "C":
-        pass
-    elif order == "F":
-        copy_order = order
-    elif order == "A":
-        if usm_ary.flags.f_contiguous:
-            copy_order = "F"
-    elif order == "K":
-        if usm_ary.flags.f_contiguous:
-            copy_order = "F"
-    else:
-        raise ValueError(
-            "Unrecognized value of the order keyword. "
-            "Recognized values are 'A', 'C', 'F', or 'K'"
-        )
-    c_contig = usm_ary.flags.c_contiguous
-    f_contig = usm_ary.flags.f_contiguous
-    R = dpt.usm_ndarray(
-        usm_ary.shape,
-        dtype=usm_ary.dtype,
-        buffer=usm_ary.usm_type,
-        order=copy_order,
-        buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
-    )
-    if order == "K" and (not c_contig and not f_contig):
-        original_strides = usm_ary.strides
-        ind = sorted(
-            range(usm_ary.ndim),
-            key=lambda i: abs(original_strides[i]),
-            reverse=True,
-        )
-        new_strides = tuple(R.strides[ind[i]] for i in ind)
-        R = dpt.usm_ndarray(
-            usm_ary.shape,
-            dtype=usm_ary.dtype,
-            buffer=R.usm_data,
-            strides=new_strides,
-        )
-    _copy_same_shape(R, usm_ary)
-    return R
-
-
 def _empty_like_orderK(X, dt, usm_type=None, dev=None):
     """Returns empty array like `x`, using order='K'
 
@@ -452,6 +380,65 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
     return dpt.permute_dims(R, inv_perm)
 
 
+def copy(usm_ary, order="K"):
+    """copy(ary, order="K")
+
+    Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`.
+
+    Args:
+        ary (usm_ndarray):
+            Input array.
+        order ({"C", "F", "A", "K"}, optional):
+            Controls the memory layout of the output array.
+    Returns:
+        usm_ndarray:
+            A copy of the input array.
+
+    Memory layout of the copy is controlled by `order` keyword,
+    following NumPy's conventions. The `order` keywords can be
+    one of the following:
+
+       - "C": C-contiguous memory layout
+       - "F": Fortran-contiguous memory layout
+       - "A": Fortran-contiguous if the input array is also Fortran-contiguous,
+         otherwise C-contiguous
+       - "K": match the layout of `usm_ary` as closely as possible.
+
+    """
+    if not isinstance(usm_ary, dpt.usm_ndarray):
+        return TypeError(
+            f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}"
+        )
+    copy_order = "C"
+    if order == "C":
+        pass
+    elif order == "F":
+        copy_order = order
+    elif order == "A":
+        if usm_ary.flags.f_contiguous:
+            copy_order = "F"
+    elif order == "K":
+        if usm_ary.flags.f_contiguous:
+            copy_order = "F"
+    else:
+        raise ValueError(
+            "Unrecognized value of the order keyword. "
+            "Recognized values are 'A', 'C', 'F', or 'K'"
+        )
+    if order == "K":
+        R = _empty_like_orderK(usm_ary, usm_ary.dtype)
+    else:
+        R = dpt.usm_ndarray(
+            usm_ary.shape,
+            dtype=usm_ary.dtype,
+            buffer=usm_ary.usm_type,
+            order=copy_order,
+            buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
+        )
+    _copy_same_shape(R, usm_ary)
+    return R
+
+
 def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
     """ astype(array, new_dtype, order="K", casting="unsafe", \
             copy=True)
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
@@ -28,6 +28,7 @@
 #include <CL/sycl.hpp>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <type_traits>
 
 #include "utils/offset_utils.hpp"
@@ -55,16 +56,12 @@ using dpctl::tensor::type_utils::vec_cast;
 
 template <typename argT1, typename argT2, typename resT> struct LogAddExpFunctor
 {
-    using supports_sg_loadstore = typename std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
-    using supports_vec = typename std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_sg_loadstore = std::true_type;
+    using supports_vec = std::true_type;
 
     resT operator()(const argT1 &in1, const argT2 &in2)
     {
-        resT max = std::max<resT>(in1, in2);
-        resT min = std::min<resT>(in1, in2);
-        return max + std::log1p(std::exp(min - max));
+        return impl<resT>(in1, in2);
     }
 
     template <int vec_sz>
@@ -76,12 +73,29 @@ template <typename argT1, typename argT2, typename resT> struct LogAddExpFunctor
 
 #pragma unroll
         for (int i = 0; i < vec_sz; ++i) {
-            resT max = std::max<resT>(in1[i], in2[i]);
-            res[i] = max + std::log1p(std::exp(std::abs(diff[i])));
+            res[i] = impl<resT>(in1[i], in2[i]);
         }
 
         return res;
     }
+
+private:
+    template <typename T> T impl(T const &in1, T const &in2)
+    {
+        T max = std::max<T>(in1, in2);
+        if (std::isnan(max)) {
+            return std::numeric_limits<T>::quiet_NaN();
+        }
+        else {
+            if (std::isinf(max)) {
+                // if both args are -inf, and hence max is -inf
+                // the result is -inf as well
+                return max;
+            }
+        }
+        T min = std::min<T>(in1, in2);
+        return max + std::log1p(std::exp(min - max));
+    }
 };
 
 template <typename argT1,