IntelPython
diff --git a/‎dpnp/dpnp_array.py
Lines changed: 1 addition & 1 deletion b/‎dpnp/dpnp_array.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dpnp/random/dpnp_iface_random.py
Lines changed: 25 additions & 15 deletions b/‎dpnp/random/dpnp_iface_random.py
Lines changed: 25 additions & 15 deletions
diff --git a/‎dpnp/tests/skipped_tests.tbl
Lines changed: 0 additions & 330 deletions b/‎dpnp/tests/skipped_tests.tbl
Lines changed: 0 additions & 330 deletions
diff --git a/‎dpnp/tests/skipped_tests_gpu.tbl
Lines changed: 0 additions & 331 deletions b/‎dpnp/tests/skipped_tests_gpu.tbl
Lines changed: 0 additions & 331 deletions
diff --git a/‎dpnp/tests/skipped_tests_gpu_no_fp64.tbl
Lines changed: 0 additions & 171 deletions b/‎dpnp/tests/skipped_tests_gpu_no_fp64.tbl
Lines changed: 0 additions & 171 deletions
diff --git a/‎dpnp/tests/test_sycl_queue.py
Lines changed: 17 additions & 14 deletions b/‎dpnp/tests/test_sycl_queue.py
Lines changed: 17 additions & 14 deletions
diff --git a/‎dpnp/tests/third_party/cupy/core_tests/test_array_function.py
Lines changed: 64 additions & 0 deletions b/‎dpnp/tests/third_party/cupy/core_tests/test_array_function.py
Lines changed: 64 additions & 0 deletions
diff --git a/‎dpnp/tests/third_party/cupy/core_tests/test_carray.py
Lines changed: 101 additions & 0 deletions b/‎dpnp/tests/third_party/cupy/core_tests/test_carray.py
Lines changed: 101 additions & 0 deletions
diff --git a/‎dpnp/tests/third_party/cupy/core_tests/test_core.py
Lines changed: 3 additions & 0 deletions b/‎dpnp/tests/third_party/cupy/core_tests/test_core.py
Lines changed: 3 additions & 0 deletions
@@ -150,7 +150,7 @@ def mT(self):
         if self.ndim < 2:
             raise ValueError("matrix transpose with ndim < 2 is undefined")
 
-        return self._array_obj.mT
+        return dpnp_array._create_from_usm_ndarray(self._array_obj.mT)
 
     def to_device(self, target_device):
         """Transfer array to target device."""
 
@@ -1022,25 +1022,31 @@ def power(a, size=None):
     return call_origin(numpy.random.power, a, size)
 
 
-def rand(d0, *dn, device=None, usm_type="device", sycl_queue=None):
+def rand(*args, device=None, usm_type="device", sycl_queue=None):
     """
     Random values in a given shape.
 
-    Create an array of the given shape and populate it with random samples
-    from a uniform distribution over [0, 1).
+    Create an array of the given shape and populate it with random samples from
+    a uniform distribution over ``[0, 1)``.
 
     For full documentation refer to :obj:`numpy.random.rand`.
 
     Parameters
     ----------
+    d0, d1, ..., dn : int, optional
+        The dimensions of the returned array, must be non-negative.
+        If no argument is given a single Python float is returned.
     device : {None, string, SyclDevice, SyclQueue}, optional
         An array API concept of device where the output array is created.
-        The `device` can be ``None`` (the default), an OneAPI filter selector string,
-        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
-        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        The `device` can be ``None`` (the default), an OneAPI filter selector
+        string, an instance of :class:`dpctl.SyclDevice` corresponding to
+        a non-partitioned SYCL device, an instance of :class:`dpctl.SyclQueue`,
+        or a `Device` object returned by
         :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+        Default: ``None``.
     usm_type : {"device", "shared", "host"}, optional
         The type of SYCL USM allocation for the output array.
+        Default: ``"device"``.
     sycl_queue : {None, SyclQueue}, optional
         A SYCL queue to use for output array allocation and copying. The
         `sycl_queue` can be passed as ``None`` (the default), which means
@@ -1051,23 +1057,27 @@ def rand(d0, *dn, device=None, usm_type="device", sycl_queue=None):
     Returns
     -------
     out : dpnp.ndarray
-        Random values in a given shape.
-        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
+        Random values in a given shape ``(d0, d1, ..., dn)``.
+        Output array data type is :obj:`dpnp.float64` if a device supports it,
+        or :obj:`dpnp.float32` type otherwise.
 
-    Examples
+    See Also
     --------
-    >>> s = dpnp.random.rand(3, 2)
+    :obj:`dpnp.random.random` : Return random floats in the half-open interval
+                                ``[0.0, 1.0)``.
+    :obj:`dpnp.random.random_sample` : Return random floats in the half-open
+                                       interval ``[0.0, 1.0)``.
+    :obj:`dpnp.random.uniform` : Draw samples from a uniform distribution.
 
-    See Also
+    Examples
     --------
-    :obj:`dpnp.random.random`
-    :obj:`dpnp.random.random_sample`
-    :obj:`dpnp.random.uniform`
+    >>> import dpnp as np
+    >>> s = np.random.rand(3, 2)
 
     """
 
     rs = _get_random_state(device=device, sycl_queue=sycl_queue)
-    return rs.rand(d0, *dn, usm_type=usm_type)
+    return rs.rand(*args, usm_type=usm_type)
 
 
 def randint(
 
@@ -1078,24 +1078,27 @@ def test_vecdot(device, shape_pair):
 
 
 @pytest.mark.parametrize(
-    "func, kwargs",
+    "func, args, kwargs",
     [
-        pytest.param("normal", {"loc": 1.0, "scale": 3.4, "size": (5, 12)}),
-        pytest.param("rand", {"d0": 20}),
+        pytest.param("normal", [], {"loc": 1.0, "scale": 3.4, "size": (5, 12)}),
+        pytest.param("rand", [20], {}),
         pytest.param(
             "randint",
+            [],
             {"low": 2, "high": 15, "size": (4, 8, 16), "dtype": dpnp.int32},
         ),
-        pytest.param("randn", {"d0": 20}),
-        pytest.param("random", {"size": (35, 45)}),
+        pytest.param("randn", [], {"d0": 20}),
+        pytest.param("random", [], {"size": (35, 45)}),
+        pytest.param(
+            "random_integers", [], {"low": -17, "high": 3, "size": (12, 16)}
+        ),
+        pytest.param("random_sample", [], {"size": (7, 7)}),
+        pytest.param("ranf", [], {"size": (10, 7, 12)}),
+        pytest.param("sample", [], {"size": (7, 9)}),
+        pytest.param("standard_normal", [], {"size": (4, 4, 8)}),
         pytest.param(
-            "random_integers", {"low": -17, "high": 3, "size": (12, 16)}
+            "uniform", [], {"low": 1.0, "high": 2.0, "size": (4, 2, 5)}
         ),
-        pytest.param("random_sample", {"size": (7, 7)}),
-        pytest.param("ranf", {"size": (10, 7, 12)}),
-        pytest.param("sample", {"size": (7, 9)}),
-        pytest.param("standard_normal", {"size": (4, 4, 8)}),
-        pytest.param("uniform", {"low": 1.0, "high": 2.0, "size": (4, 2, 5)}),
     ],
 )
 @pytest.mark.parametrize(
@@ -1104,11 +1107,11 @@ def test_vecdot(device, shape_pair):
     ids=[device.filter_string for device in valid_devices],
 )
 @pytest.mark.parametrize("usm_type", ["host", "device", "shared"])
-def test_random(func, kwargs, device, usm_type):
+def test_random(func, args, kwargs, device, usm_type):
     kwargs = {**kwargs, "device": device, "usm_type": usm_type}
 
     # test with default SYCL queue per a device
-    res_array = getattr(dpnp.random, func)(**kwargs)
+    res_array = getattr(dpnp.random, func)(*args, **kwargs)
     assert device == res_array.sycl_device
     assert usm_type == res_array.usm_type
 
@@ -1120,7 +1123,7 @@ def test_random(func, kwargs, device, usm_type):
     kwargs["sycl_queue"] = sycl_queue
 
     # test with in-order SYCL queue per a device and passed as argument
-    res_array = getattr(dpnp.random, func)(**kwargs)
+    res_array = getattr(dpnp.random, func)(*args, **kwargs)
     assert usm_type == res_array.usm_type
     assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
@@ -0,0 +1,64 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+pytest.skip(
+    "__array_function__ protocol is not supported", allow_module_level=True
+)
+
+
+class TestArrayFunction(unittest.TestCase):
+
+    @testing.with_requires("numpy>=1.17.0")
+    def test_array_function(self):
+        a = numpy.random.randn(100, 100)
+        a_cpu = numpy.asarray(a)
+        a_gpu = cupy.asarray(a)
+
+        # The numpy call for both CPU and GPU arrays is intentional to test the
+        # __array_function__ protocol
+        qr_cpu = numpy.linalg.qr(a_cpu)
+        qr_gpu = numpy.linalg.qr(a_gpu)
+
+        if isinstance(qr_cpu, tuple):
+            for b_cpu, b_gpu in zip(qr_cpu, qr_gpu):
+                assert b_cpu.dtype == b_gpu.dtype
+                testing.assert_allclose(b_cpu, b_gpu, atol=1e-4)
+        else:
+            assert qr_cpu.dtype == qr_gpu.dtype
+            testing.assert_allclose(qr_cpu, qr_gpu, atol=1e-4)
+
+    @testing.with_requires("numpy>=1.17.0")
+    def test_array_function2(self):
+        a = numpy.random.randn(100, 100)
+        a_cpu = numpy.asarray(a)
+        a_gpu = cupy.asarray(a)
+
+        # The numpy call for both CPU and GPU arrays is intentional to test the
+        # __array_function__ protocol
+        out_cpu = numpy.sum(a_cpu, axis=1)
+        out_gpu = numpy.sum(a_gpu, axis=1)
+
+        assert out_cpu.dtype == out_gpu.dtype
+        testing.assert_allclose(out_cpu, out_gpu, atol=1e-4)
+
+    @testing.with_requires("numpy>=1.17.0")
+    @testing.numpy_cupy_equal()
+    def test_array_function_can_cast(self, xp):
+        return numpy.can_cast(xp.arange(2), "f4")
+
+    @testing.with_requires("numpy>=1.17.0")
+    @testing.numpy_cupy_equal()
+    def test_array_function_common_type(self, xp):
+        return numpy.common_type(
+            xp.arange(2, dtype="f8"), xp.arange(2, dtype="f4")
+        )
+
+    @testing.with_requires("numpy>=1.17.0")
+    @testing.numpy_cupy_equal()
+    def test_array_function_result_type(self, xp):
+        return numpy.result_type(3, xp.arange(2, dtype="f8"))
@@ -0,0 +1,101 @@
+import unittest
+
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+pytest.skip("CArray is not supported", allow_module_level=True)
+
+
+class TestCArray(unittest.TestCase):
+
+    def test_size(self):
+        x = cupy.arange(3).astype("i")
+        y = cupy.ElementwiseKernel(
+            "raw int32 x",
+            "int32 y",
+            "y = x.size()",
+            "test_carray_size",
+        )(x, size=1)
+        assert int(y[0]) == 3
+
+    def test_shape(self):
+        x = cupy.arange(6).reshape((2, 3)).astype("i")
+        y = cupy.ElementwiseKernel(
+            "raw int32 x",
+            "int32 y",
+            "y = x.shape()[i]",
+            "test_carray_shape",
+        )(x, size=2)
+        testing.assert_array_equal(y, (2, 3))
+
+    def test_strides(self):
+        x = cupy.arange(6).reshape((2, 3)).astype("i")
+        y = cupy.ElementwiseKernel(
+            "raw int32 x",
+            "int32 y",
+            "y = x.strides()[i]",
+            "test_carray_strides",
+        )(x, size=2)
+        testing.assert_array_equal(y, (12, 4))
+
+    def test_getitem_int(self):
+        x = cupy.arange(24).reshape((2, 3, 4)).astype("i")
+        y = cupy.empty_like(x)
+        y = cupy.ElementwiseKernel(
+            "raw T x",
+            "int32 y",
+            "y = x[i]",
+            "test_carray_getitem_int",
+        )(x, y)
+        testing.assert_array_equal(y, x)
+
+    def test_getitem_idx(self):
+        x = cupy.arange(24).reshape((2, 3, 4)).astype("i")
+        y = cupy.empty_like(x)
+        y = cupy.ElementwiseKernel(
+            "raw T x",
+            "int32 y",
+            "ptrdiff_t idx[] = {i / 12, i / 4 % 3, i % 4}; y = x[idx]",
+            "test_carray_getitem_idx",
+        )(x, y)
+        testing.assert_array_equal(y, x)
+
+
+@testing.parameterize(
+    {"size": 2**31 - 1024},
+    {"size": 2**31},
+    {"size": 2**31 + 1024},
+    {"size": 2**32 - 1024},
+    {"size": 2**32},
+    {"size": 2**32 + 1024},
+)
+@testing.slow
+class TestCArray32BitBoundary(unittest.TestCase):
+    # This test case is intended to confirm CArray indexing work correctly
+    # with input/output arrays whose size is so large that it crosses the
+    # 32-bit boundary (in terms of both number of elements and size in bytes).
+    # This test requires approx. 8 GiB GPU memory to run.
+    # See https://github.com/cupy/cupy/pull/882 for detailed discussions.
+
+    def tearDown(self):
+        # Free huge memory for slow test
+        cupy.get_default_memory_pool().free_all_blocks()
+
+    # HIP is known to fail with sizes > 2**32-1024
+    @unittest.skipIf(cupy.cuda.runtime.is_hip, "HIP does not support this")
+    def test(self):
+        # Elementwise
+        a = cupy.full((1, self.size), 7, dtype=cupy.int8)
+        # Reduction
+        result = a.sum(axis=0, dtype=cupy.int8)
+        # Explicitly specify the dtype to absorb Linux/Windows difference.
+        assert result.sum(dtype=cupy.int64) == self.size * 7
+
+    # HIP is known to fail with sizes > 2**32-1024
+    @unittest.skipIf(cupy.cuda.runtime.is_hip, "HIP does not support this")
+    def test_assign(self):
+        a = cupy.zeros(self.size, dtype=cupy.int8)
+        a[-1] = 1.0
+        assert a.sum() == 1
@@ -9,6 +9,7 @@
 
 
 class TestSize(unittest.TestCase):
+
     # def tearDown(self):
     #     # Free huge memory for slow test
     #     cupy.get_default_memory_pool().free_all_blocks()
@@ -51,6 +52,7 @@ def test_size_huge(self, xp):
 
 @pytest.mark.skip("no cupy._core submodule")
 class TestOrder(unittest.TestCase):
+
     @testing.for_orders(_orders.keys())
     def test_ndarray(self, order):
         order_expect = _orders[order]
@@ -101,6 +103,7 @@ def test_cupy_ndarray(self, dtype):
 )
 @pytest.mark.skip("compiling cupy headers are not supported")
 class TestCuPyHeaders(unittest.TestCase):
+
     def setUp(self):
         self.temporary_cache_dir_context = test_raw.use_temporary_cache_dir()
         self.cache_dir = self.temporary_cache_dir_context.__enter__()