Skip to content

Merge from master to gold to pick-up fixes #1170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 29 additions & 18 deletions dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,9 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref,
(void)dep_event_vec_ref;

DPCTLSyclEventRef event_ref = nullptr;
DPCTLSyclEventRef e1_ref = nullptr;
DPCTLSyclEventRef e2_ref = nullptr;
DPCTLSyclEventRef e3_ref = nullptr;

if ((input1_in == nullptr) || (result1_out == nullptr))
{
Expand All @@ -514,29 +517,36 @@ DPCTLSyclEventRef dpnp_ptp_c(DPCTLSyclQueueRef q_ref,
_DataType* min_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q));
_DataType* max_arr = reinterpret_cast<_DataType*>(sycl::malloc_shared(result_size * sizeof(_DataType), q));

dpnp_min_c<_DataType>(arr, min_arr, result_size, input_shape, input_ndim, axis, naxis);
dpnp_max_c<_DataType>(arr, max_arr, result_size, input_shape, input_ndim, axis, naxis);
e1_ref = dpnp_min_c<_DataType>(q_ref, arr, min_arr, result_size, input_shape, input_ndim, axis, naxis, NULL);
e2_ref = dpnp_max_c<_DataType>(q_ref, arr, max_arr, result_size, input_shape, input_ndim, axis, naxis, NULL);

shape_elem_type* _strides =
reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(result_ndim * sizeof(shape_elem_type), q));
get_shape_offsets_inkernel(result_shape, result_ndim, _strides);

dpnp_subtract_c<_DataType, _DataType, _DataType>(result,
result_size,
result_ndim,
result_shape,
result_strides,
max_arr,
result_size,
result_ndim,
result_shape,
_strides,
min_arr,
result_size,
result_ndim,
result_shape,
_strides,
NULL);
e3_ref = dpnp_subtract_c<_DataType, _DataType, _DataType>(q_ref, result,
result_size,
result_ndim,
result_shape,
result_strides,
max_arr,
result_size,
result_ndim,
result_shape,
_strides,
min_arr,
result_size,
result_ndim,
result_shape,
_strides,
NULL, NULL);

DPCTLEvent_Wait(e1_ref);
DPCTLEvent_Wait(e2_ref);
DPCTLEvent_Wait(e3_ref);
DPCTLEvent_Delete(e1_ref);
DPCTLEvent_Delete(e2_ref);
DPCTLEvent_Delete(e3_ref);

sycl::free(min_arr, q);
sycl::free(max_arr, q);
Expand Down Expand Up @@ -576,6 +586,7 @@ void dpnp_ptp_c(void* result1_out,
naxis,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType>
Expand Down
24 changes: 16 additions & 8 deletions dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,16 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
\
sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref)); \
\
DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \
DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \
DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \
DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size); \
DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \
DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \
\
DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \
DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \
DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \
DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size); \
DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \
DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \
\
DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \
DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim); \
DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true); \
DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim); \
\
_DataType* input1_data = input1_ptr.get_ptr(); \
shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \
Expand Down Expand Up @@ -226,6 +226,14 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
}; \
event = q.submit(kernel_func); \
} \
input1_ptr.depends_on(event); \
input1_shape_ptr.depends_on(event); \
input1_strides_ptr.depends_on(event); \
input2_ptr.depends_on(event); \
input2_shape_ptr.depends_on(event); \
input2_strides_ptr.depends_on(event); \
result_ptr.depends_on(event); \
result_strides_ptr.depends_on(event); \
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event); \
\
return DPCTLEvent_Copy(event_ref); \
Expand Down
23 changes: 23 additions & 0 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@
} \
} \
\
input1_ptr.depends_on(event); \
input1_shape_ptr.depends_on(event); \
input1_strides_ptr.depends_on(event); \
result_ptr.depends_on(event); \
result_strides_ptr.depends_on(event); \
\
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event); \
\
return DPCTLEvent_Copy(event_ref); \
Expand Down Expand Up @@ -644,6 +650,12 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
} \
} \
\
input1_ptr.depends_on(event); \
input1_shape_ptr.depends_on(event); \
input1_strides_ptr.depends_on(event); \
result_ptr.depends_on(event); \
result_strides_ptr.depends_on(event); \
\
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event); \
\
return DPCTLEvent_Copy(event_ref); \
Expand Down Expand Up @@ -998,6 +1010,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
event = q.submit(kernel_func); \
} \
} \
\
input1_ptr.depends_on(event); \
input1_shape_ptr.depends_on(event); \
input1_strides_ptr.depends_on(event); \
input2_ptr.depends_on(event); \
input2_shape_ptr.depends_on(event); \
input2_strides_ptr.depends_on(event); \
result_ptr.depends_on(event); \
result_shape_ptr.depends_on(event); \
result_strides_ptr.depends_on(event); \
\
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event); \
\
return DPCTLEvent_Copy(event_ref); \
Expand Down
2 changes: 1 addition & 1 deletion dpnp/backend/kernels/dpnp_krnl_indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref,
DPCTLSyclEventRef event_ref = nullptr;
sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));

DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size, true);
DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, array1_size);
DPNPC_ptr_adapter<_IndecesType> input2_ptr(q_ref, indices1, size);
_DataType* array_1 = input1_ptr.get_ptr();
_IndecesType* indices = input2_ptr.get_ptr();
Expand Down
8 changes: 8 additions & 0 deletions dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref,
event = q.submit(kernel_func);
}

input1_ptr.depends_on(event);
result1_ptr.depends_on(event);
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);

return DPCTLEvent_Copy(event_ref);
Expand Down Expand Up @@ -483,6 +485,8 @@ DPCTLSyclEventRef dpnp_ediff1d_c(DPCTLSyclQueueRef q_ref,
};
event = q.submit(kernel_func);

input1_ptr.depends_on(event);
result_ptr.depends_on(event);
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);

return DPCTLEvent_Copy(event_ref);
Expand Down Expand Up @@ -676,6 +680,7 @@ void dpnp_floor_divide_c(void* result_out,
where,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
Expand Down Expand Up @@ -770,6 +775,7 @@ void dpnp_modf_c(void* array1_in, void* result1_out, void* result2_out, size_t s
size,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_input, typename _DataType_output>
Expand Down Expand Up @@ -911,6 +917,7 @@ void dpnp_remainder_c(void* result_out,
where,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
Expand Down Expand Up @@ -1041,6 +1048,7 @@ void dpnp_trapz_c(
array2_size,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_input1, typename _DataType_input2, typename _DataType_output>
Expand Down
32 changes: 8 additions & 24 deletions dpnp/backend/kernels/dpnp_krnl_random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1674,23 +1674,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref,
// Fast, statically typed path: shuffle the underlying buffer.
// Only for non-empty, 1d objects of class ndarray (subclasses such
// as MaskedArrays may not support this approach).
char* buf = reinterpret_cast<char*>(sycl::malloc_shared(itemsize * sizeof(char), q));
void* buf = sycl::malloc_device(itemsize, q);
for (size_t i = uvec_size; i > 0; i--)
{
size_t j = (size_t)(floor((i + 1) * Uvec[i - 1]));
if (i != j)
{
auto memcpy1 =
q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * itemsize, itemsize); });
auto memcpy2 = q.submit([&](sycl::handler& h) {
h.depends_on({memcpy1});
h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize);
});
auto memcpy3 = q.submit([&](sycl::handler& h) {
h.depends_on({memcpy2});
h.memcpy(result1 + i * itemsize, buf, itemsize);
});
memcpy3.wait();
auto memcpy1 = q.memcpy(buf, result1 + j * itemsize, itemsize);
auto memcpy2 = q.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize, memcpy1);
q.memcpy(result1 + i * itemsize, buf, itemsize, memcpy2).wait();
}
}
sycl::free(buf, q);
Expand All @@ -1699,23 +1691,15 @@ DPCTLSyclEventRef dpnp_rng_shuffle_c(DPCTLSyclQueueRef q_ref,
{
// Multidimensional ndarrays require a bounce buffer.
size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element
char* buf = reinterpret_cast<char*>(sycl::malloc_shared(step_size * sizeof(char), q));
void* buf = sycl::malloc_device(step_size, q);
for (size_t i = uvec_size; i > 0; i--)
{
size_t j = (size_t)(floor((i + 1) * Uvec[i - 1]));
if (j < i)
{
auto memcpy1 =
q.submit([&](sycl::handler& h) { h.memcpy(buf, result1 + j * step_size, step_size); });
auto memcpy2 = q.submit([&](sycl::handler& h) {
h.depends_on({memcpy1});
h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size);
});
auto memcpy3 = q.submit([&](sycl::handler& h) {
h.depends_on({memcpy2});
h.memcpy(result1 + i * step_size, buf, step_size);
});
memcpy3.wait();
auto memcpy1 = q.memcpy(buf, result1 + j * step_size, step_size);
auto memcpy2 = q.memcpy(result1 + j * step_size, result1 + i * step_size, step_size, memcpy1);
q.memcpy(result1 + i * step_size, buf, step_size, memcpy2).wait();
}
}
sycl::free(buf, q);
Expand Down
2 changes: 2 additions & 0 deletions dpnp/backend/kernels/dpnp_krnl_reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ void dpnp_sum_c(void* result_out,
where,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_output, typename _DataType_input>
Expand Down Expand Up @@ -278,6 +279,7 @@ void dpnp_prod_c(void* result_out,
where,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType_output, typename _DataType_input>
Expand Down
Loading