Skip to content

Commit ea718e3

Browse files
authored
Properly set properties of group_load/group_store to striped (#2238)
PR proposes to pass `striped` property into `group_load` and `group_store` calls. It will help to fully replicate the legacy behavior of `sub_group::load<vec_sz>`and `sub_group::store<vec_sz>`.
1 parent 8f05542 commit ea718e3

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

dpnp/backend/kernels/dpnp_krnl_elemwise.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@
4141
using dpctl::tensor::kernels::alignment_utils::is_aligned;
4242
using dpctl::tensor::kernels::alignment_utils::required_alignment;
4343

44-
using sycl::ext::oneapi::experimental::group_load;
45-
using sycl::ext::oneapi::experimental::group_store;
44+
namespace syclex = sycl::ext::oneapi::experimental;
45+
using syclex::group_load;
46+
using syclex::group_store;
47+
48+
constexpr auto striped = syclex::properties{syclex::data_placement_striped};
4649

4750
template <typename T>
4851
constexpr T dispatch_erf_op(T elem)
@@ -529,8 +532,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
529532
sycl::vec<_DataType_input1, vec_sz> x1{}; \
530533
sycl::vec<_DataType_input2, vec_sz> x2{}; \
531534
\
532-
group_load(sg, input1_multi_ptr, x1); \
533-
group_load(sg, input2_multi_ptr, x2); \
535+
group_load(sg, input1_multi_ptr, x1, striped); \
536+
group_load(sg, input2_multi_ptr, x2, striped); \
534537
\
535538
res_vec = __vec_operation__; \
536539
} \
@@ -540,8 +543,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
540543
sycl::vec<_DataType_input1, vec_sz> tmp_x1{}; \
541544
sycl::vec<_DataType_input2, vec_sz> tmp_x2{}; \
542545
\
543-
group_load(sg, input1_multi_ptr, tmp_x1); \
544-
group_load(sg, input2_multi_ptr, tmp_x2); \
546+
group_load(sg, input1_multi_ptr, tmp_x1, \
547+
striped); \
548+
group_load(sg, input2_multi_ptr, tmp_x2, \
549+
striped); \
545550
\
546551
sycl::vec<_DataType_output, vec_sz> x1 = \
547552
dpnp_vec_cast<_DataType_output, \
@@ -559,16 +564,16 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
559564
sycl::vec<_DataType_input1, vec_sz> x1{}; \
560565
sycl::vec<_DataType_input2, vec_sz> x2{}; \
561566
\
562-
group_load(sg, input1_multi_ptr, x1); \
563-
group_load(sg, input2_multi_ptr, x2); \
567+
group_load(sg, input1_multi_ptr, x1, striped); \
568+
group_load(sg, input2_multi_ptr, x2, striped); \
564569
\
565570
for (size_t k = 0; k < vec_sz; ++k) { \
566571
const _DataType_output input1_elem = x1[k]; \
567572
const _DataType_output input2_elem = x2[k]; \
568573
res_vec[k] = __operation__; \
569574
} \
570575
} \
571-
group_store(sg, res_vec, result_multi_ptr); \
576+
group_store(sg, res_vec, result_multi_ptr, striped); \
572577
} \
573578
else { \
574579
for (size_t k = start + sg.get_local_id()[0]; \

0 commit comments

Comments
 (0)