address comments

vtavana · vtavana · commit 63aab701d1dd · 2024-02-06T10:00:50.000-06:00
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
@@ -43,11 +43,7 @@
 import dpnp
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
-from dpnp.dpnp_utils.dpnp_utils_linearalgebra import (
-    dpnp_dot,
-    dpnp_matmul,
-    dpnp_vdot,
-)
+from dpnp.dpnp_utils.dpnp_utils_linearalgebra import dpnp_dot, dpnp_matmul
 
 __all__ = [
     "dot",
@@ -456,11 +452,13 @@ def vdot(a, b):
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
-        First input array. If `a` is complex the complex conjugate
-        is taken before the calculation of the dot product.
+    a : {dpnp_array, usm_ndarray, scalar}
+        First input array. Both inputs `a` and `b` can not be
+        scalars at the same time. If `a` is complex, the complex
+        conjugate is taken before the calculation of the dot product.
     b : {dpnp_array, usm_ndarray, scalar}
-        Second input array.
+        Second input array. Both inputs `a` and `b` can not be
+        scalars at the same time.
 
     Returns
     -------
@@ -494,17 +492,19 @@ def vdot(a, b):
 
     """
 
-    dpnp.check_supported_arrays_type(a)
+    dpnp.check_supported_arrays_type(a, scalar_type=True)
     dpnp.check_supported_arrays_type(b, scalar_type=True)
 
-    if dpnp.isscalar(b):
-        if a.size != 1:
+    if dpnp.isscalar(a) or dpnp.isscalar(b):
+        if dpnp.isscalar(b) and a.size != 1:
             raise ValueError("The first array should be of size one.")
+        if dpnp.isscalar(a) and b.size != 1:
+            raise ValueError("The second array should be of size one.")
         # TODO: investigate usage of axpy (axpy_batch) or scal
         # functions from BLAS here instead of dpnp.multiply
-        return dpnp.multiply(dpnp.conj(a), b)
+        return dpnp.multiply(numpy.conj(a), b)
     elif a.ndim == 1 and b.ndim == 1:
-        return dpnp_vdot(a, b)
+        return dpnp_dot(a, b, out=None, conjugate=True)
     else:
         # dot product of flatten arrays
-        return dpnp_vdot(dpnp.ravel(a), dpnp.ravel(b))
+        return dpnp_dot(dpnp.ravel(a), dpnp.ravel(b), out=None, conjugate=True)
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -33,7 +33,7 @@
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import get_usm_allocations
 
-__all__ = ["dpnp_dot", "dpnp_matmul", "dpnp_vdot"]
+__all__ = ["dpnp_dot", "dpnp_matmul"]
 
 
 def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None):
@@ -185,7 +185,7 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue):
     return op_dtype, res_dtype
 
 
-def dpnp_dot(a, b, /, out=None):
+def dpnp_dot(a, b, /, out=None, *, conjugate=False):
     """
     Return the dot product of two arrays.
 
@@ -194,7 +194,9 @@ def dpnp_dot(a, b, /, out=None):
     `dpctl.tensor.vecdot` form the Data Parallel Control library is used,
     2) For real-valued floating point data types, `dot` routines from
     BLAS library of OneMKL are used, and 3) For complex data types,
-    `dotu` routines from BLAS library of OneMKL are used.
+    `dotu` or `dotc` routines from BLAS library of OneMKL are used.
+    If `conjugate` is ``False``, `dotu` is used. Otherwise, `dotc` is used,
+    for which the first array is conjugated before calculating the dot product.
 
     """
 
@@ -228,13 +230,22 @@ def dpnp_dot(a, b, /, out=None):
         a = _copy_array(a, dep_events_list, host_tasks_list, dtype=dot_dtype)
         b = _copy_array(b, dep_events_list, host_tasks_list, dtype=dot_dtype)
         if dpnp.issubdtype(res_dtype, dpnp.complexfloating):
-            ht_ev, _ = bi._dotu(
-                exec_q,
-                dpnp.get_usm_ndarray(a),
-                dpnp.get_usm_ndarray(b),
-                dpnp.get_usm_ndarray(result),
-                dep_events_list,
-            )
+            if conjugate:
+                ht_ev, _ = bi._dotc(
+                    exec_q,
+                    dpnp.get_usm_ndarray(a),
+                    dpnp.get_usm_ndarray(b),
+                    dpnp.get_usm_ndarray(result),
+                    dep_events_list,
+                )
+            else:
+                ht_ev, _ = bi._dotu(
+                    exec_q,
+                    dpnp.get_usm_ndarray(a),
+                    dpnp.get_usm_ndarray(b),
+                    dpnp.get_usm_ndarray(result),
+                    dep_events_list,
+                )
         else:
             ht_ev, _ = bi._dot(
                 exec_q,
@@ -253,7 +264,7 @@ def dpnp_dot(a, b, /, out=None):
     if dot_dtype != res_dtype:
         result = result.astype(res_dtype, copy=False)
 
-    # NumPy does not allow casting even if it is safe
+    # numpy.dot does not allow casting even if it is safe
     return dpnp.get_result_array(result, out, casting="no")
 
 
@@ -447,74 +458,3 @@ def dpnp_matmul(
             return result
     else:
         return dpnp.get_result_array(result, out, casting=casting)
-
-
-def dpnp_vdot(a, b):
-    """
-    Return the dot product of two arrays.
-
-    The routine that is used to perform the main calculation
-    depends on input arrays data type: 1) For integer and boolean data types,
-    `dpctl.tensor.vecdot` form the Data Parallel Control library is used,
-    2) For real-valued floating point data types, `dot` routines from
-    BLAS library of OneMKL are used, and 3) For complex data types,
-    `dotc` routines from BLAS library of OneMKL are used.
-
-    """
-
-    if a.size != b.size:
-        raise ValueError(
-            "Input arrays have a mismatch in their size. "
-            f"(size {a.size} is different from {b.size})"
-        )
-
-    res_usm_type, exec_q = get_usm_allocations([a, b])
-
-    # Determine the appropriate data types
-    # casting is irrelevant here since dtype is `None`
-    dot_dtype, res_dtype = _op_res_dtype(
-        a, b, dtype=None, casting="no", sycl_queue=exec_q
-    )
-
-    # create result array
-    result = dpnp.empty(
-        (),
-        dtype=dot_dtype,
-        usm_type=res_usm_type,
-        sycl_queue=exec_q,
-    )
-
-    # input arrays should have the proper data type
-    dep_events_list = []
-    host_tasks_list = []
-    if dpnp.issubdtype(res_dtype, dpnp.inexact):
-        # copying is needed if dtypes of input arrays are different
-        a = _copy_array(a, dep_events_list, host_tasks_list, dtype=dot_dtype)
-        b = _copy_array(b, dep_events_list, host_tasks_list, dtype=dot_dtype)
-        if dpnp.issubdtype(res_dtype, dpnp.complexfloating):
-            ht_ev, _ = bi._dotc(
-                exec_q,
-                dpnp.get_usm_ndarray(a),
-                dpnp.get_usm_ndarray(b),
-                dpnp.get_usm_ndarray(result),
-                dep_events_list,
-            )
-        else:
-            ht_ev, _ = bi._dot(
-                exec_q,
-                dpnp.get_usm_ndarray(a),
-                dpnp.get_usm_ndarray(b),
-                dpnp.get_usm_ndarray(result),
-                dep_events_list,
-            )
-        host_tasks_list.append(ht_ev)
-        dpctl.SyclEvent.wait_for(host_tasks_list)
-    else:
-        dpt_a = dpnp.get_usm_ndarray(a)
-        dpt_b = dpnp.get_usm_ndarray(b)
-        result = dpnp_array._create_from_usm_ndarray(dpt.vecdot(dpt_a, dpt_b))
-
-    if dot_dtype != res_dtype:
-        result = result.astype(res_dtype, copy=False)
-
-    return result
diff --git a/tests/test_dot.py b/tests/test_dot.py
@@ -384,6 +384,10 @@ def test_vdot_scalar(self, dtype):
         expected = numpy.vdot(a, b)
         assert_allclose(result, expected)
 
+        result = dpnp.vdot(b, ia)
+        expected = numpy.vdot(b, a)
+        assert_allclose(result, expected)
+
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
     @pytest.mark.parametrize(
         "array_info",
@@ -505,3 +509,7 @@ def test_vdot_error(self):
         # The first array should be of size one
         with pytest.raises(ValueError):
             dpnp.vdot(a, b)
+
+        # The second array should be of size one
+        with pytest.raises(ValueError):
+            dpnp.vdot(b, a)