Skip to content

Commit 02d46b4

Browse files
Do not use sg.get_local_range
Use sg.get_max_local_range instead. The `sg.get_local_range` must perform lots of checks to determine if this is the last trailing sub-group in the work-group and its actual size may be smaller. We set the local work-group size to be 128, which is a multiple of any sub-group size, and hence get_local_range() always equals to get_max_local_raneg(). The size of the work-groups was increated from 128 to 256, which is chosen so that all 8 threads of single vector with simd32 are used. Set vec_sz and n_vecs in implementations of contig_impl for each support function Make local work-groups size dependent on number of elements to process Fixes for type dispatching utils 1. Add missing include <type_traits> needed for std::true_type, and std::disjunction, std::conjunction 2. Replace std::bool_constant<std::same_v<T1, T2>> with direct and simpler std::same<T1, T2> in couple of instances Hide hyperparameter selection struct in anonymous namespace
1 parent 4d8d7ff commit 02d46b4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+2534
-558
lines changed

dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@
3232
#include <type_traits>
3333

3434
#include "cabs_impl.hpp"
35-
#include "kernels/elementwise_functions/common.hpp"
35+
#include "vec_size_util.hpp"
3636

3737
#include "kernels/dpctl_tensor_types.hpp"
38+
#include "kernels/elementwise_functions/common.hpp"
39+
3840
#include "utils/offset_utils.hpp"
3941
#include "utils/type_dispatch_building.hpp"
4042
#include "utils/type_utils.hpp"
@@ -89,8 +91,8 @@ template <typename argT, typename resT> struct AbsFunctor
8991

9092
template <typename argT,
9193
typename resT = argT,
92-
unsigned int vec_sz = 4,
93-
unsigned int n_vecs = 2,
94+
unsigned int vec_sz = 4u,
95+
unsigned int n_vecs = 2u,
9496
bool enable_sg_loadstore = true>
9597
using AbsContigFunctor =
9698
elementwise_common::UnaryContigFunctor<argT,
@@ -122,6 +124,24 @@ template <typename T> struct AbsOutputType
122124
static constexpr bool is_defined = !std::is_same_v<value_type, void>;
123125
};
124126

127+
namespace
128+
{
129+
130+
namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
131+
132+
using vsu_ns::ContigHyperparameterSetDefault;
133+
134+
template <typename argTy> struct AbsContigHyperparameterSet
135+
{
136+
using value_type =
137+
typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
138+
139+
constexpr static auto vec_sz = value_type::vec_sz;
140+
constexpr static auto n_vecs = value_type::n_vecs;
141+
};
142+
143+
} // namespace
144+
125145
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
126146
class abs_contig_kernel;
127147

@@ -132,9 +152,12 @@ sycl::event abs_contig_impl(sycl::queue &exec_q,
132152
char *res_p,
133153
const std::vector<sycl::event> &depends = {})
134154
{
155+
constexpr unsigned int vec_sz = AbsContigHyperparameterSet<argTy>::vec_sz;
156+
constexpr unsigned int n_vec = AbsContigHyperparameterSet<argTy>::n_vecs;
157+
135158
return elementwise_common::unary_contig_impl<
136-
argTy, AbsOutputType, AbsContigFunctor, abs_contig_kernel>(
137-
exec_q, nelems, arg_p, res_p, depends);
159+
argTy, AbsOutputType, AbsContigFunctor, abs_contig_kernel, vec_sz,
160+
n_vec>(exec_q, nelems, arg_p, res_p, depends);
138161
}
139162

140163
template <typename fnT, typename T> struct AbsContigFactory

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@
2929
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

32-
#include "kernels/elementwise_functions/common.hpp"
3332
#include "sycl_complex.hpp"
33+
#include "vec_size_util.hpp"
3434

3535
#include "kernels/dpctl_tensor_types.hpp"
36+
#include "kernels/elementwise_functions/common.hpp"
37+
3638
#include "utils/offset_utils.hpp"
3739
#include "utils/type_dispatch_building.hpp"
3840
#include "utils/type_utils.hpp"
@@ -128,8 +130,8 @@ template <typename argT, typename resT> struct AcosFunctor
128130

129131
template <typename argTy,
130132
typename resTy = argTy,
131-
unsigned int vec_sz = 4,
132-
unsigned int n_vecs = 2,
133+
unsigned int vec_sz = 4u,
134+
unsigned int n_vecs = 2u,
133135
bool enable_sg_loadstore = true>
134136
using AcosContigFunctor =
135137
elementwise_common::UnaryContigFunctor<argTy,
@@ -156,6 +158,24 @@ template <typename T> struct AcosOutputType
156158
static constexpr bool is_defined = !std::is_same_v<value_type, void>;
157159
};
158160

161+
namespace
162+
{
163+
164+
namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
165+
166+
using vsu_ns::ContigHyperparameterSetDefault;
167+
168+
template <typename argTy> struct AcosContigHyperparameterSet
169+
{
170+
using value_type =
171+
typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
172+
173+
constexpr static auto vec_sz = value_type::vec_sz;
174+
constexpr static auto n_vecs = value_type::n_vecs;
175+
};
176+
177+
} // namespace
178+
159179
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
160180
class acos_contig_kernel;
161181

@@ -166,9 +186,12 @@ sycl::event acos_contig_impl(sycl::queue &exec_q,
166186
char *res_p,
167187
const std::vector<sycl::event> &depends = {})
168188
{
189+
constexpr unsigned int vec_sz = AcosContigHyperparameterSet<argTy>::vec_sz;
190+
constexpr unsigned int n_vec = AcosContigHyperparameterSet<argTy>::n_vecs;
191+
169192
return elementwise_common::unary_contig_impl<
170-
argTy, AcosOutputType, AcosContigFunctor, acos_contig_kernel>(
171-
exec_q, nelems, arg_p, res_p, depends);
193+
argTy, AcosOutputType, AcosContigFunctor, acos_contig_kernel, vec_sz,
194+
n_vec>(exec_q, nelems, arg_p, res_p, depends);
172195
}
173196

174197
template <typename fnT, typename T> struct AcosContigFactory

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@
2929
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

32-
#include "kernels/elementwise_functions/common.hpp"
3332
#include "sycl_complex.hpp"
33+
#include "vec_size_util.hpp"
3434

3535
#include "kernels/dpctl_tensor_types.hpp"
36+
#include "kernels/elementwise_functions/common.hpp"
37+
3638
#include "utils/offset_utils.hpp"
3739
#include "utils/type_dispatch_building.hpp"
3840
#include "utils/type_utils.hpp"
@@ -155,8 +157,8 @@ template <typename argT, typename resT> struct AcoshFunctor
155157

156158
template <typename argTy,
157159
typename resTy = argTy,
158-
unsigned int vec_sz = 4,
159-
unsigned int n_vecs = 2,
160+
unsigned int vec_sz = 4u,
161+
unsigned int n_vecs = 2u,
160162
bool enable_sg_loadstore = true>
161163
using AcoshContigFunctor =
162164
elementwise_common::UnaryContigFunctor<argTy,
@@ -183,6 +185,25 @@ template <typename T> struct AcoshOutputType
183185
static constexpr bool is_defined = !std::is_same_v<value_type, void>;
184186
};
185187

188+
namespace
189+
{
190+
191+
namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
192+
193+
using vsu_ns::ContigHyperparameterSetDefault;
194+
using vsu_ns::UnaryContigHyperparameterSetEntry;
195+
196+
template <typename argTy> struct AcoshContigHyperparameterSet
197+
{
198+
using value_type =
199+
typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
200+
201+
constexpr static auto vec_sz = value_type::vec_sz;
202+
constexpr static auto n_vecs = value_type::n_vecs;
203+
};
204+
205+
} // namespace
206+
186207
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
187208
class acosh_contig_kernel;
188209

@@ -193,9 +214,12 @@ sycl::event acosh_contig_impl(sycl::queue &exec_q,
193214
char *res_p,
194215
const std::vector<sycl::event> &depends = {})
195216
{
217+
constexpr unsigned int vec_sz = AcoshContigHyperparameterSet<argTy>::vec_sz;
218+
constexpr unsigned int n_vec = AcoshContigHyperparameterSet<argTy>::n_vecs;
219+
196220
return elementwise_common::unary_contig_impl<
197-
argTy, AcoshOutputType, AcoshContigFunctor, acosh_contig_kernel>(
198-
exec_q, nelems, arg_p, res_p, depends);
221+
argTy, AcoshOutputType, AcoshContigFunctor, acosh_contig_kernel, vec_sz,
222+
n_vec>(exec_q, nelems, arg_p, res_p, depends);
199223
}
200224

201225
template <typename fnT, typename T> struct AcoshContigFactory

dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include <type_traits>
3131

3232
#include "sycl_complex.hpp"
33+
#include "vec_size_util.hpp"
34+
3335
#include "utils/offset_utils.hpp"
3436
#include "utils/type_dispatch_building.hpp"
3537
#include "utils/type_utils.hpp"
@@ -110,8 +112,8 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor
110112
template <typename argT1,
111113
typename argT2,
112114
typename resT,
113-
unsigned int vec_sz = 4,
114-
unsigned int n_vecs = 2,
115+
unsigned int vec_sz = 4u,
116+
unsigned int n_vecs = 2u,
115117
bool enable_sg_loadstore = true>
116118
using AddContigFunctor =
117119
elementwise_common::BinaryContigFunctor<argT1,
@@ -196,6 +198,43 @@ template <typename T1, typename T2> struct AddOutputType
196198
static constexpr bool is_defined = !std::is_same_v<value_type, void>;
197199
};
198200

201+
namespace
202+
{
203+
204+
namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
205+
206+
using vsu_ns::BinaryContigHyperparameterSetEntry;
207+
using vsu_ns::ContigHyperparameterSetDefault;
208+
209+
template <typename argTy1, typename argTy2> struct AddContigHyperparameterSet
210+
{
211+
using value_type = typename std::disjunction<
212+
BinaryContigHyperparameterSetEntry<argTy1,
213+
std::int64_t,
214+
argTy2,
215+
std::int64_t,
216+
1u,
217+
2u>,
218+
BinaryContigHyperparameterSetEntry<argTy1,
219+
std::uint64_t,
220+
argTy2,
221+
std::uint64_t,
222+
1u,
223+
2u>,
224+
BinaryContigHyperparameterSetEntry<argTy1,
225+
double,
226+
argTy2,
227+
double,
228+
1u,
229+
2u>,
230+
ContigHyperparameterSetDefault<4u, 2u>>;
231+
232+
constexpr static auto vec_sz = value_type::vec_sz;
233+
constexpr static auto n_vecs = value_type::n_vecs;
234+
};
235+
236+
} // end of anonymous namespace
237+
199238
template <typename argT1,
200239
typename argT2,
201240
typename resT,
@@ -214,10 +253,13 @@ sycl::event add_contig_impl(sycl::queue &exec_q,
214253
ssize_t res_offset,
215254
const std::vector<sycl::event> &depends = {})
216255
{
256+
constexpr auto vec_sz = AddContigHyperparameterSet<argTy1, argTy2>::vec_sz;
257+
constexpr auto n_vecs = AddContigHyperparameterSet<argTy1, argTy2>::n_vecs;
258+
217259
return elementwise_common::binary_contig_impl<
218-
argTy1, argTy2, AddOutputType, AddContigFunctor, add_contig_kernel>(
219-
exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p,
220-
res_offset, depends);
260+
argTy1, argTy2, AddOutputType, AddContigFunctor, add_contig_kernel,
261+
vec_sz, n_vecs>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
262+
arg2_offset, res_p, res_offset, depends);
221263
}
222264

223265
template <typename fnT, typename T1, typename T2> struct AddContigFactory
@@ -410,8 +452,8 @@ template <typename argT, typename resT> struct AddInplaceFunctor
410452

411453
template <typename argT,
412454
typename resT,
413-
unsigned int vec_sz = 4,
414-
unsigned int n_vecs = 2,
455+
unsigned int vec_sz = 4u,
456+
unsigned int n_vecs = 2u,
415457
bool enable_sg_loadstore = true>
416458
using AddInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor<
417459
argT,
@@ -489,9 +531,13 @@ add_inplace_contig_impl(sycl::queue &exec_q,
489531
ssize_t res_offset,
490532
const std::vector<sycl::event> &depends = {})
491533
{
534+
constexpr auto vec_sz = AddContigHyperparameterSet<resTy, argTy>::vec_sz;
535+
constexpr auto n_vecs = AddContigHyperparameterSet<resTy, argTy>::n_vecs;
536+
492537
return elementwise_common::binary_inplace_contig_impl<
493-
argTy, resTy, AddInplaceContigFunctor, add_inplace_contig_kernel>(
494-
exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends);
538+
argTy, resTy, AddInplaceContigFunctor, add_inplace_contig_kernel,
539+
vec_sz, n_vecs>(exec_q, nelems, arg_p, arg_offset, res_p, res_offset,
540+
depends);
495541
}
496542

497543
template <typename fnT, typename T1, typename T2> struct AddInplaceContigFactory

dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@
3030
#include <sycl/sycl.hpp>
3131
#include <type_traits>
3232

33-
#include "kernels/elementwise_functions/common.hpp"
3433
#include "sycl_complex.hpp"
34+
#include "vec_size_util.hpp"
3535

3636
#include "kernels/dpctl_tensor_types.hpp"
37+
#include "kernels/elementwise_functions/common.hpp"
38+
3739
#include "utils/offset_utils.hpp"
3840
#include "utils/type_dispatch_building.hpp"
3941
#include "utils/type_utils.hpp"
@@ -74,8 +76,8 @@ template <typename argT, typename resT> struct AngleFunctor
7476

7577
template <typename argTy,
7678
typename resTy = argTy,
77-
unsigned int vec_sz = 4,
78-
unsigned int n_vecs = 2,
79+
unsigned int vec_sz = 4u,
80+
unsigned int n_vecs = 2u,
7981
bool enable_sg_loadstore = true>
8082
using AngleContigFunctor =
8183
elementwise_common::UnaryContigFunctor<argTy,
@@ -99,6 +101,25 @@ template <typename T> struct AngleOutputType
99101
static constexpr bool is_defined = !std::is_same_v<value_type, void>;
100102
};
101103

104+
namespace
105+
{
106+
107+
namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
108+
109+
using vsu_ns::ContigHyperparameterSetDefault;
110+
using vsu_ns::UnaryContigHyperparameterSetEntry;
111+
112+
template <typename argTy> struct AngleContigHyperparameterSet
113+
{
114+
using value_type =
115+
typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
116+
117+
constexpr static auto vec_sz = value_type::vec_sz;
118+
constexpr static auto n_vecs = value_type::n_vecs;
119+
};
120+
121+
} // end of anonymous namespace
122+
102123
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
103124
class angle_contig_kernel;
104125

@@ -109,9 +130,12 @@ sycl::event angle_contig_impl(sycl::queue &exec_q,
109130
char *res_p,
110131
const std::vector<sycl::event> &depends = {})
111132
{
133+
constexpr unsigned int vec_sz = AngleContigHyperparameterSet<argTy>::vec_sz;
134+
constexpr unsigned int n_vec = AngleContigHyperparameterSet<argTy>::n_vecs;
135+
112136
return elementwise_common::unary_contig_impl<
113-
argTy, AngleOutputType, AngleContigFunctor, angle_contig_kernel>(
114-
exec_q, nelems, arg_p, res_p, depends);
137+
argTy, AngleOutputType, AngleContigFunctor, angle_contig_kernel, vec_sz,
138+
n_vec>(exec_q, nelems, arg_p, res_p, depends);
115139
}
116140

117141
template <typename fnT, typename T> struct AngleContigFactory

0 commit comments

Comments
 (0)