Skip to content

Pass queue to iterator #1130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,17 +906,19 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
{ \
DPNPC_id<_DataType_input1>* input1_it; \
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(sycl::malloc_shared(input1_it_size_in_bytes, q));\
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, \
input1_it_size_in_bytes)); \
new (input1_it) \
DPNPC_id<_DataType_input1>(input1_data, input1_shape_data, input1_strides_data, input1_ndim); \
DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim); \
\
input1_it->broadcast_to_shape(result_shape_data, result_ndim); \
\
DPNPC_id<_DataType_input2>* input2_it; \
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(sycl::malloc_shared(input2_it_size_in_bytes, q));\
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, \
input2_it_size_in_bytes)); \
new (input2_it) \
DPNPC_id<_DataType_input2>(input2_data, input2_shape_data, input2_strides_data, input2_ndim); \
DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim); \
\
input2_it->broadcast_to_shape(result_shape_data, result_ndim); \
\
Expand Down
16 changes: 8 additions & 8 deletions dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,15 +587,15 @@ DPCTLSyclEventRef dpnp_floor_divide_c(DPCTLSyclQueueRef q_ref,

DPNPC_id<_DataType_input1>* input1_it;
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(sycl::malloc_shared(input1_it_size_in_bytes, q));
new (input1_it) DPNPC_id<_DataType_input1>(input1_data, input1_shape, input1_shape_ndim);
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));
new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_shape_ndim);

input1_it->broadcast_to_shape(result_shape);

DPNPC_id<_DataType_input2>* input2_it;
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(sycl::malloc_shared(input2_it_size_in_bytes, q));
new (input2_it) DPNPC_id<_DataType_input2>(input2_data, input2_shape, input2_shape_ndim);
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));
new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_shape_ndim);

input2_it->broadcast_to_shape(result_shape);

Expand Down Expand Up @@ -823,15 +823,15 @@ DPCTLSyclEventRef dpnp_remainder_c(DPCTLSyclQueueRef q_ref,

DPNPC_id<_DataType_input1>* input1_it;
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(sycl::malloc_shared(input1_it_size_in_bytes, q));
new (input1_it) DPNPC_id<_DataType_input1>(input1_data, input1_shape, input1_shape_ndim);
input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));
new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_shape_ndim);

input1_it->broadcast_to_shape(result_shape);

DPNPC_id<_DataType_input2>* input2_it;
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(sycl::malloc_shared(input2_it_size_in_bytes, q));
new (input2_it) DPNPC_id<_DataType_input2>(input2_data, input2_shape, input2_shape_ndim);
input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));
new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_shape_ndim);

input2_it->broadcast_to_shape(result_shape);

Expand Down
4 changes: 2 additions & 2 deletions dpnp/backend/kernels/dpnp_krnl_reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ DPCTLSyclEventRef dpnp_sum_c(DPCTLSyclQueueRef q_ref,
}
}

DPNPC_id<_DataType_input> input_it(input, input_shape, input_shape_ndim);
DPNPC_id<_DataType_input> input_it(q_ref, input, input_shape, input_shape_ndim);
input_it.set_axes(axes, axes_ndim);

const size_t output_size = input_it.get_output_size();
Expand Down Expand Up @@ -235,7 +235,7 @@ DPCTLSyclEventRef dpnp_prod_c(DPCTLSyclQueueRef q_ref,
return event_ref;
}

DPNPC_id<_DataType_input> input_it(input, input_shape, input_shape_ndim);
DPNPC_id<_DataType_input> input_it(q_ref, input, input_shape, input_shape_ndim);
input_it.set_axes(axes, axes_ndim);

const size_t output_size = input_it.get_output_size();
Expand Down
59 changes: 38 additions & 21 deletions dpnp/backend/src/dpnp_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,20 @@ class DPNPC_id final
using reference = value_type&;
using size_type = shape_elem_type;

DPNPC_id(pointer __ptr, const size_type* __shape, const size_type __shape_size)
DPNPC_id(DPCTLSyclQueueRef q_ref, pointer __ptr, const size_type* __shape, const size_type __shape_size)
{
queue_ref = q_ref;
std::vector<size_type> shape(__shape, __shape + __shape_size);
init_container(__ptr, shape);
}

DPNPC_id(pointer __ptr, const size_type* __shape, const size_type* __strides, const size_type __ndim)
DPNPC_id(DPCTLSyclQueueRef q_ref,
pointer __ptr,
const size_type* __shape,
const size_type* __strides,
const size_type __ndim)
{
queue_ref = q_ref;
std::vector<size_type> shape(__shape, __shape + __ndim);
std::vector<size_type> strides(__strides, __strides + __ndim);
init_container(__ptr, shape, strides);
Expand All @@ -223,12 +229,14 @@ class DPNPC_id final
*
* @note this function is designed for non-SYCL environment execution
*
* @param [in] q_ref Reference to SYCL queue.
* @param [in] __ptr Pointer to input data. Used to get values only.
* @param [in] __shape Shape of data provided by @ref __ptr.
* Empty container means scalar value pointed by @ref __ptr.
*/
DPNPC_id(pointer __ptr, const std::vector<size_type>& __shape)
DPNPC_id(DPCTLSyclQueueRef q_ref, pointer __ptr, const std::vector<size_type>& __shape)
{
queue_ref = q_ref;
init_container(__ptr, __shape);
}

Expand Down Expand Up @@ -296,7 +304,7 @@ class DPNPC_id final

output_shape_size = __shape.size();
const size_type output_shape_size_in_bytes = output_shape_size * sizeof(size_type);
output_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(output_shape_size_in_bytes));
output_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));

for (int irit = input_shape_size - 1, orit = output_shape_size - 1; orit >= 0; --irit, --orit)
{
Expand All @@ -311,13 +319,15 @@ class DPNPC_id final

broadcast_axes_size = valid_axes.size();
const size_type broadcast_axes_size_in_bytes = broadcast_axes_size * sizeof(size_type);
broadcast_axes = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(broadcast_axes_size_in_bytes));
broadcast_axes = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
broadcast_axes_size_in_bytes));
std::copy(valid_axes.begin(), valid_axes.end(), broadcast_axes);

output_size = std::accumulate(
output_shape, output_shape + output_shape_size, size_type(1), std::multiplies<size_type>());

output_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(output_shape_size_in_bytes));
output_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
output_shape_size_in_bytes));
get_shape_offsets_inkernel<size_type>(output_shape, output_shape_size, output_shape_strides);

iteration_size = 1;
Expand Down Expand Up @@ -392,7 +402,7 @@ class DPNPC_id final
const size_type iteration_shape_size_in_bytes = iteration_shape_size * sizeof(size_type);
std::vector<size_type> iteration_shape;

output_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(output_shape_size_in_bytes));
output_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
size_type* output_shape_it = output_shape;
for (size_type i = 0; i < input_shape_size; ++i)
{
Expand All @@ -406,7 +416,8 @@ class DPNPC_id final
output_size = std::accumulate(
output_shape, output_shape + output_shape_size, size_type(1), std::multiplies<size_type>());

output_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(output_shape_size_in_bytes));
output_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
output_shape_size_in_bytes));
get_shape_offsets_inkernel<size_type>(output_shape, output_shape_size, output_shape_strides);

iteration_size = 1;
Expand All @@ -418,11 +429,13 @@ class DPNPC_id final
iteration_size *= axis_dim;
}

iteration_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(iteration_shape_size_in_bytes));
iteration_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
iteration_shape_size_in_bytes));
get_shape_offsets_inkernel<size_type>(
iteration_shape.data(), iteration_shape.size(), iteration_shape_strides);

axes_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(iteration_shape_size_in_bytes));
axes_shape_strides = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
iteration_shape_size_in_bytes));
for (size_t i = 0; i < iteration_shape_size; ++i)
{
axes_shape_strides[i] = input_shape_strides[axes[i]];
Expand Down Expand Up @@ -490,11 +503,12 @@ class DPNPC_id final
}

input_shape_size = __shape.size();
input_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(input_shape_size * sizeof(size_type)));
input_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
input_shape_size * sizeof(size_type)));
std::copy(__shape.begin(), __shape.end(), input_shape);

input_shape_strides =
reinterpret_cast<size_type*>(dpnp_memory_alloc_c(input_shape_size * sizeof(size_type)));
reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref, input_shape_size * sizeof(size_type)));
get_shape_offsets_inkernel<size_type>(input_shape, input_shape_size, input_shape_strides);
}
iteration_size = input_size;
Expand Down Expand Up @@ -525,11 +539,12 @@ class DPNPC_id final
}

input_shape_size = __shape.size();
input_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(input_shape_size * sizeof(size_type)));
input_shape = reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref,
input_shape_size * sizeof(size_type)));
std::copy(__shape.begin(), __shape.end(), input_shape);

input_shape_strides =
reinterpret_cast<size_type*>(dpnp_memory_alloc_c(input_shape_size * sizeof(size_type)));
reinterpret_cast<size_type*>(dpnp_memory_alloc_c(queue_ref, input_shape_size * sizeof(size_type)));
std::copy(__strides.begin(), __strides.end(), input_shape_strides);
}
iteration_size = input_size;
Expand Down Expand Up @@ -583,23 +598,23 @@ class DPNPC_id final
void free_axes_memory()
{
axes.clear();
dpnp_memory_free_c(axes_shape_strides);
dpnp_memory_free_c(queue_ref, axes_shape_strides);
axes_shape_strides = nullptr;
}

void free_broadcast_axes_memory()
{
broadcast_axes_size = size_type{};
dpnp_memory_free_c(broadcast_axes);
dpnp_memory_free_c(queue_ref, broadcast_axes);
broadcast_axes = nullptr;
}

void free_input_memory()
{
input_size = size_type{};
input_shape_size = size_type{};
dpnp_memory_free_c(input_shape);
dpnp_memory_free_c(input_shape_strides);
dpnp_memory_free_c(queue_ref, input_shape);
dpnp_memory_free_c(queue_ref, input_shape_strides);
input_shape = nullptr;
input_shape_strides = nullptr;
}
Expand All @@ -608,16 +623,16 @@ class DPNPC_id final
{
iteration_size = size_type{};
iteration_shape_size = size_type{};
dpnp_memory_free_c(iteration_shape_strides);
dpnp_memory_free_c(queue_ref, iteration_shape_strides);
iteration_shape_strides = nullptr;
}

void free_output_memory()
{
output_size = size_type{};
output_shape_size = size_type{};
dpnp_memory_free_c(output_shape);
dpnp_memory_free_c(output_shape_strides);
dpnp_memory_free_c(queue_ref, output_shape);
dpnp_memory_free_c(queue_ref, output_shape_strides);
output_shape = nullptr;
output_shape_strides = nullptr;
}
Expand All @@ -631,6 +646,8 @@ class DPNPC_id final
free_output_memory();
}

DPCTLSyclQueueRef queue_ref = nullptr; /**< reference to SYCL queue */

pointer data = nullptr; /**< input array begin pointer */
size_type input_size = size_type{}; /**< input array size */
size_type* input_shape = nullptr; /**< input array shape */
Expand Down
10 changes: 7 additions & 3 deletions dpnp/backend/tests/test_broadcast_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ TEST_P(IteratorBroadcasting, loop_broadcast)
const IteratorParameters& param = GetParam();
std::vector<data_type> input_data = get_input_data<data_type>(param.input_shape);

DPNPC_id<data_type> input(input_data.data(), param.input_shape);
DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);

DPNPC_id<data_type> input(q_ref, input_data.data(), param.input_shape);
input.broadcast_to_shape(param.output_shape);

ASSERT_EQ(input.get_output_size(), param.result.size());
Expand All @@ -82,9 +84,11 @@ TEST_P(IteratorBroadcasting, sycl_broadcast)
std::vector<data_type> input_data = get_input_data<data_type>(param.input_shape);
data_type* shared_data = get_shared_data<data_type>(input_data);

DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);

DPNPC_id<data_type>* input_it;
input_it = reinterpret_cast<DPNPC_id<data_type>*>(dpnp_memory_alloc_c(sizeof(DPNPC_id<data_type>)));
new (input_it) DPNPC_id<data_type>(shared_data, param.input_shape);
input_it = reinterpret_cast<DPNPC_id<data_type>*>(dpnp_memory_alloc_c(q_ref, sizeof(DPNPC_id<data_type>)));
new (input_it) DPNPC_id<data_type>(q_ref, shared_data, param.input_shape);

input_it->broadcast_to_shape(param.output_shape);

Expand Down
Loading