@@ -1674,23 +1674,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref,
1674
1674
// Fast, statically typed path: shuffle the underlying buffer.
1675
1675
// Only for non-empty, 1d objects of class ndarray (subclasses such
1676
1676
// as MaskedArrays may not support this approach).
1677
- char * buf = reinterpret_cast < char *>( sycl::malloc_shared (itemsize * sizeof ( char ) , q) );
1677
+ void * buf = sycl::malloc_device (itemsize, q);
1678
1678
for (size_t i = uvec_size; i > 0 ; i--)
1679
1679
{
1680
1680
size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
1681
1681
if (i != j)
1682
1682
{
1683
- auto memcpy1 =
1684
- q.submit ([&](sycl::handler& h) { h.memcpy (buf, result1 + j * itemsize, itemsize); });
1685
- auto memcpy2 = q.submit ([&](sycl::handler& h) {
1686
- h.depends_on ({memcpy1});
1687
- h.memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize);
1688
- });
1689
- auto memcpy3 = q.submit ([&](sycl::handler& h) {
1690
- h.depends_on ({memcpy2});
1691
- h.memcpy (result1 + i * itemsize, buf, itemsize);
1692
- });
1693
- memcpy3.wait ();
1683
+ auto memcpy1 = q.memcpy (buf, result1 + j * itemsize, itemsize);
1684
+ auto memcpy2 = q.memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize, memcpy1);
1685
+ q.memcpy (result1 + i * itemsize, buf, itemsize, memcpy2).wait ();
1694
1686
}
1695
1687
}
1696
1688
sycl::free (buf, q);
@@ -1699,23 +1691,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref,
1699
1691
{
1700
1692
// Multidimensional ndarrays require a bounce buffer.
1701
1693
size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element
1702
- char * buf = reinterpret_cast < char *>( sycl::malloc_shared (step_size * sizeof ( char ) , q) );
1694
+ void * buf = sycl::malloc_device (step_size, q);
1703
1695
for (size_t i = uvec_size; i > 0 ; i--)
1704
1696
{
1705
1697
size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
1706
1698
if (j < i)
1707
1699
{
1708
- auto memcpy1 =
1709
- q.submit ([&](sycl::handler& h) { h.memcpy (buf, result1 + j * step_size, step_size); });
1710
- auto memcpy2 = q.submit ([&](sycl::handler& h) {
1711
- h.depends_on ({memcpy1});
1712
- h.memcpy (result1 + j * step_size, result1 + i * step_size, step_size);
1713
- });
1714
- auto memcpy3 = q.submit ([&](sycl::handler& h) {
1715
- h.depends_on ({memcpy2});
1716
- h.memcpy (result1 + i * step_size, buf, step_size);
1717
- });
1718
- memcpy3.wait ();
1700
+ auto memcpy1 = q.memcpy (buf, result1 + j * step_size, step_size);
1701
+ auto memcpy2 = q.memcpy (result1 + j * step_size, result1 + i * step_size, step_size, memcpy1);
1702
+ q.memcpy (result1 + i * step_size, buf, step_size, memcpy2).wait ();
1719
1703
}
1720
1704
}
1721
1705
sycl::free (buf, q);
0 commit comments