Skip to content

Commit 962b00a

Browse files
authored
[SYCL] Pull oneDPL tuple to use in reduction implementation (#3481)
Using sycl::detail::tuple is a temporary work-around for various problems caused by using std::tuple: a) reduction using std::tuple cannot be compiled on Windows because std::tuple cannot be copied to DEVICE. b) internal error in level_zero RT. The new sycl::detail::tuple class is a very limited version of oneDPL's implementation of tuple. It includes such functionality: - convert from std::tuple and to std::tuple - tie(), get<I>(), tuple_element, make_tuple This change enables parallel_for() with number of reductions more than 1 for level_zero and for Windows. The corresponding changes in LIT tests: intel/llvm-test-suite#194 Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 529f74e commit 962b00a

File tree

2 files changed

+235
-17
lines changed

2 files changed

+235
-17
lines changed

sycl/include/CL/sycl/ONEAPI/reduction.hpp

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <CL/sycl/ONEAPI/group_algorithm.hpp>
1313
#include <CL/sycl/accessor.hpp>
1414
#include <CL/sycl/atomic.hpp>
15+
#include <CL/sycl/detail/tuple.hpp>
1516
#include <CL/sycl/handler.hpp>
1617
#include <CL/sycl/kernel.hpp>
1718

@@ -30,6 +31,15 @@ using cl::sycl::detail::is_sgeninteger;
3031
using cl::sycl::detail::queue_impl;
3132
using cl::sycl::detail::remove_AS;
3233

34+
// std::tuple seems to be a) too heavy and b) not copyable to device now
35+
// Thus sycl::detail::tuple is used instead.
36+
// Switching from sycl::device::tuple to std::tuple can be done by re-defining
37+
// the ReduTupleT type and makeReduTupleT() function below.
38+
template <typename... Ts> using ReduTupleT = sycl::detail::tuple<Ts...>;
39+
template <typename... Ts> ReduTupleT<Ts...> makeReduTupleT(Ts... Elements) {
40+
return sycl::detail::make_tuple(Elements...);
41+
}
42+
3343
__SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
3444
size_t LocalMemBytesPerWorkItem);
3545
__SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
@@ -1290,7 +1300,7 @@ reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) {
12901300
template <typename... Reductions, size_t... Is>
12911301
auto createReduLocalAccs(size_t Size, handler &CGH,
12921302
std::index_sequence<Is...>) {
1293-
return std::make_tuple(
1303+
return makeReduTupleT(
12941304
std::tuple_element_t<Is, std::tuple<Reductions...>>::getReadWriteLocalAcc(
12951305
Size, CGH)...);
12961306
}
@@ -1302,7 +1312,7 @@ template <bool IsOneWG, typename... Reductions, size_t... Is>
13021312
auto createReduOutAccs(size_t NWorkGroups, handler &CGH,
13031313
std::tuple<Reductions...> &ReduTuple,
13041314
std::index_sequence<Is...>) {
1305-
return std::make_tuple(
1315+
return makeReduTupleT(
13061316
std::get<Is>(ReduTuple).template getWriteMemForPartialReds<IsOneWG>(
13071317
NWorkGroups, CGH)...);
13081318
}
@@ -1314,19 +1324,19 @@ template <typename... Reductions, size_t... Is>
13141324
auto getReadAccsToPreviousPartialReds(handler &CGH,
13151325
std::tuple<Reductions...> &ReduTuple,
13161326
std::index_sequence<Is...>) {
1317-
return std::make_tuple(
1327+
return makeReduTupleT(
13181328
std::get<Is>(ReduTuple).getReadAccToPreviousPartialReds(CGH)...);
13191329
}
13201330

13211331
template <typename... Reductions, size_t... Is>
1322-
std::tuple<typename Reductions::result_type...>
1332+
ReduTupleT<typename Reductions::result_type...>
13231333
getReduIdentities(std::tuple<Reductions...> &ReduTuple,
13241334
std::index_sequence<Is...>) {
13251335
return {std::get<Is>(ReduTuple).getIdentity()...};
13261336
}
13271337

13281338
template <typename... Reductions, size_t... Is>
1329-
std::tuple<typename Reductions::binary_operation...>
1339+
ReduTupleT<typename Reductions::binary_operation...>
13301340
getReduBOPs(std::tuple<Reductions...> &ReduTuple, std::index_sequence<Is...>) {
13311341
return {std::get<Is>(ReduTuple).getBinaryOperation()...};
13321342
}
@@ -1340,8 +1350,8 @@ getInitToIdentityProperties(std::tuple<Reductions...> &ReduTuple,
13401350

13411351
template <typename... Reductions, size_t... Is>
13421352
std::tuple<typename Reductions::reducer_type...>
1343-
createReducers(std::tuple<typename Reductions::result_type...> Identities,
1344-
std::tuple<typename Reductions::binary_operation...> BOPsTuple,
1353+
createReducers(ReduTupleT<typename Reductions::result_type...> Identities,
1354+
ReduTupleT<typename Reductions::binary_operation...> BOPsTuple,
13451355
std::index_sequence<Is...>) {
13461356
return {typename Reductions::reducer_type{std::get<Is>(Identities),
13471357
std::get<Is>(BOPsTuple)}...};
@@ -1357,9 +1367,9 @@ void callReduUserKernelFunc(KernelType KernelFunc, nd_item<Dims> NDIt,
13571367
template <bool Pow2WG, typename... LocalAccT, typename... ReducerT,
13581368
typename... ResultT, size_t... Is>
13591369
void initReduLocalAccs(size_t LID, size_t WGSize,
1360-
std::tuple<LocalAccT...> LocalAccs,
1370+
ReduTupleT<LocalAccT...> LocalAccs,
13611371
const std::tuple<ReducerT...> &Reducers,
1362-
const std::tuple<ResultT...> Identities,
1372+
ReduTupleT<ResultT...> Identities,
13631373
std::index_sequence<Is...>) {
13641374
std::tie(std::get<Is>(LocalAccs)[LID]...) =
13651375
std::make_tuple(std::get<Is>(Reducers).MValue...);
@@ -1375,9 +1385,9 @@ void initReduLocalAccs(size_t LID, size_t WGSize,
13751385
template <bool UniformPow2WG, typename... LocalAccT, typename... InputAccT,
13761386
typename... ResultT, size_t... Is>
13771387
void initReduLocalAccs(size_t LID, size_t GID, size_t NWorkItems, size_t WGSize,
1378-
std::tuple<InputAccT...> LocalAccs,
1379-
std::tuple<LocalAccT...> InputAccs,
1380-
const std::tuple<ResultT...> Identities,
1388+
ReduTupleT<InputAccT...> LocalAccs,
1389+
ReduTupleT<LocalAccT...> InputAccs,
1390+
ReduTupleT<ResultT...> Identities,
13811391
std::index_sequence<Is...>) {
13821392
// Normally, the local accessors are initialized with elements from the input
13831393
// accessors. The exception is the case when (GID >= NWorkItems), which
@@ -1402,8 +1412,8 @@ void initReduLocalAccs(size_t LID, size_t GID, size_t NWorkItems, size_t WGSize,
14021412

14031413
template <typename... LocalAccT, typename... BOPsT, size_t... Is>
14041414
void reduceReduLocalAccs(size_t IndexA, size_t IndexB,
1405-
std::tuple<LocalAccT...> LocalAccs,
1406-
std::tuple<BOPsT...> BOPs,
1415+
ReduTupleT<LocalAccT...> LocalAccs,
1416+
ReduTupleT<BOPsT...> BOPs,
14071417
std::index_sequence<Is...>) {
14081418
std::tie(std::get<Is>(LocalAccs)[IndexA]...) =
14091419
std::make_tuple((std::get<Is>(BOPs)(std::get<Is>(LocalAccs)[IndexA],
@@ -1415,8 +1425,8 @@ template <bool Pow2WG, bool IsOneWG, typename... Reductions,
14151425
typename... Ts, size_t... Is>
14161426
void writeReduSumsToOutAccs(
14171427
size_t OutAccIndex, size_t WGSize, std::tuple<Reductions...> *,
1418-
std::tuple<OutAccT...> OutAccs, std::tuple<LocalAccT...> LocalAccs,
1419-
std::tuple<BOPsT...> BOPs, std::tuple<Ts...> IdentityVals,
1428+
ReduTupleT<OutAccT...> OutAccs, ReduTupleT<LocalAccT...> LocalAccs,
1429+
ReduTupleT<BOPsT...> BOPs, ReduTupleT<Ts...> IdentityVals,
14201430
std::array<bool, sizeof...(Reductions)> IsInitializeToIdentity,
14211431
std::index_sequence<Is...>) {
14221432
// Add the initial value of user's variable to the final result.
@@ -1528,9 +1538,9 @@ void reduCGFuncImpl(handler &CGH, KernelType KernelFunc,
15281538
auto OutAccsTuple =
15291539
createReduOutAccs<IsOneWG>(NWorkGroups, CGH, ReduTuple, ReduIndices);
15301540
auto IdentitiesTuple = getReduIdentities(ReduTuple, ReduIndices);
1541+
auto BOPsTuple = getReduBOPs(ReduTuple, ReduIndices);
15311542
auto InitToIdentityProps =
15321543
getInitToIdentityProperties(ReduTuple, ReduIndices);
1533-
auto BOPsTuple = getReduBOPs(ReduTuple, ReduIndices);
15341544

15351545
using Name = typename get_reduction_main_kernel_name_t<
15361546
KernelName, KernelType, Pow2WG, IsOneWG, decltype(OutAccsTuple)>::name;

sycl/include/CL/sycl/detail/tuple.hpp

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
//== tuple.hpp - limited trivially copy constructible implementation- C++ --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===--------------------------------------------------------------------=== //
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/defines_elementary.hpp>
12+
13+
#include <cassert>
14+
#include <iterator>
15+
#include <tuple>
16+
#include <type_traits>
17+
18+
__SYCL_INLINE_NAMESPACE(cl) {
19+
namespace sycl {
20+
namespace detail {
21+
22+
template <typename... T> struct tuple;
23+
24+
template <typename T, typename... Ts, std::size_t... Is>
25+
std::tuple<Ts...> get_tuple_tail_impl(const std::tuple<T, Ts...> &Tuple,
26+
const std::index_sequence<Is...> &) {
27+
return std::tuple<Ts...>(std::get<Is + 1>(Tuple)...);
28+
}
29+
30+
template <typename T, typename... Ts>
31+
std::tuple<Ts...> get_tuple_tail(const std::tuple<T, Ts...> &Tuple) {
32+
return get_tuple_tail_impl(Tuple, std::make_index_sequence<sizeof...(Ts)>());
33+
}
34+
35+
template <typename... Ts> constexpr tuple<Ts...> make_tuple(Ts... Args) {
36+
return sycl::detail::tuple<Ts...>{Args...};
37+
}
38+
39+
template <typename... Ts> auto tie(Ts &... Args) {
40+
return sycl::detail::tuple<Ts &...>(Args...);
41+
}
42+
43+
template <std::size_t N, typename T> struct tuple_element;
44+
45+
template <std::size_t N, typename T, typename... Rest>
46+
struct tuple_element<N, tuple<T, Rest...>>
47+
: tuple_element<N - 1, tuple<Rest...>> {};
48+
49+
template <typename T, typename... Rest>
50+
struct tuple_element<0, tuple<T, Rest...>> {
51+
using type = T;
52+
};
53+
54+
template <std::size_t I, class T>
55+
using tuple_element_t = typename tuple_element<I, T>::type;
56+
57+
// Functor returning reference to the selected element of the tuple.
58+
template <size_t N> struct get {
59+
template <typename... Ts>
60+
constexpr auto operator()(tuple<Ts...> &Tuple) const
61+
-> decltype(get<N - 1>()(Tuple.next)) {
62+
return get<N - 1>()(Tuple.next);
63+
}
64+
65+
template <typename... Ts>
66+
constexpr auto operator()(const tuple<Ts...> &Tuple) const
67+
-> decltype(get<N - 1>()(Tuple.next)) {
68+
return get<N - 1>()(Tuple.next);
69+
}
70+
};
71+
72+
// Functor returning reference to selected element of the tuple.
73+
// Specialization stopping the recursion.
74+
template <> struct get<0> {
75+
template <typename... Ts>
76+
using ret_type = typename tuple_element<0, tuple<Ts...>>::type;
77+
78+
template <typename... Ts>
79+
constexpr ret_type<Ts...> &operator()(tuple<Ts...> &Tuple) const noexcept {
80+
return Tuple.holder.value;
81+
}
82+
83+
template <typename... Ts>
84+
constexpr ret_type<Ts...> const &operator()(const tuple<Ts...> &Tuple) const
85+
noexcept {
86+
return Tuple.holder.value;
87+
}
88+
};
89+
90+
template <typename T> struct TupleValueHolder {
91+
TupleValueHolder() = default;
92+
TupleValueHolder(const T &Value) : value(Value) {}
93+
T value;
94+
};
95+
96+
// Tuple needs to be trivially_copy_assignable. Define operator= if necessary.
97+
template <typename T,
98+
bool = std::is_trivially_copy_assignable<TupleValueHolder<T>>::value>
99+
struct TupleCopyAssignableValueHolder : TupleValueHolder<T> {
100+
using TupleValueHolder<T>::TupleValueHolder;
101+
};
102+
103+
template <typename T>
104+
struct TupleCopyAssignableValueHolder<T, false> : TupleValueHolder<T> {
105+
using TupleValueHolder<T>::TupleValueHolder;
106+
107+
TupleCopyAssignableValueHolder &
108+
operator=(const TupleCopyAssignableValueHolder &RHS) {
109+
this->value = RHS.value;
110+
return *this;
111+
}
112+
};
113+
114+
template <typename T, typename... Ts> struct tuple<T, Ts...> {
115+
TupleCopyAssignableValueHolder<T> holder;
116+
tuple<Ts...> next;
117+
118+
using tuple_type = std::tuple<T, Ts...>;
119+
120+
tuple() = default;
121+
tuple(const tuple &) = default;
122+
template <typename UT, typename... UTs>
123+
tuple(const tuple<UT, UTs...> &RHS)
124+
: holder(RHS.holder.value), next(RHS.next) {}
125+
126+
tuple(const T &Value, const Ts &... Next) : holder(Value), next(Next...) {}
127+
128+
// required to convert std::tuple to inner tuple in user-provided functor
129+
tuple(const std::tuple<T, Ts...> &RHS)
130+
: holder(std::get<0>(RHS)), next(get_tuple_tail(RHS)) {}
131+
132+
// Convert to std::tuple with the same template arguments.
133+
operator std::tuple<T, Ts...>() const {
134+
return to_std_tuple(*this, std::make_index_sequence<sizeof...(Ts) + 1>());
135+
}
136+
137+
// Convert to std::tuple with different template arguments.
138+
template <typename UT, typename... UTs>
139+
operator std::tuple<UT, UTs...>() const {
140+
return to_std_tuple(static_cast<tuple<UT, UTs...>>(*this),
141+
std::make_index_sequence<sizeof...(Ts) + 1>());
142+
}
143+
144+
template <typename UT, typename... UTs>
145+
tuple &operator=(const detail::tuple<UT, UTs...> &RHS) {
146+
holder.value = RHS.holder.value;
147+
next = RHS.next;
148+
return *this;
149+
}
150+
151+
// if T is deduced with reference, compiler generates deleted operator= and,
152+
// since "template operator=" is not considered as operator= overload
153+
// the deleted operator= has a preference during lookup
154+
tuple &operator=(const detail::tuple<T, Ts...> &) = default;
155+
156+
// Convert std::tuple to sycl::detail::tuple
157+
template <typename UT, typename... UTs>
158+
tuple &operator=(const std::tuple<UT, UTs...> &RHS) {
159+
holder.value = std::get<0>(RHS);
160+
next = get_tuple_tail(RHS);
161+
return *this;
162+
}
163+
164+
friend bool operator==(const tuple &LHS, const tuple &RHS) {
165+
return LHS.holder.value == RHS.holder.value && LHS.next == RHS.next;
166+
}
167+
friend bool operator!=(const tuple &LHS, const tuple &RHS) {
168+
return !(LHS == RHS);
169+
}
170+
171+
template <typename UT, typename... UTs, std::size_t... Is>
172+
static std::tuple<UT, UTs...> to_std_tuple(const tuple<UT, UTs...> &Tuple,
173+
std::index_sequence<Is...>) {
174+
return std::tuple<UT, UTs...>(get<Is>()(Tuple)...);
175+
}
176+
};
177+
178+
template <> struct tuple<> {
179+
using tuple_type = std::tuple<>;
180+
181+
tuple() = default;
182+
tuple(const tuple &) = default;
183+
tuple(const std::tuple<> &) {}
184+
185+
tuple &operator=(const tuple &) = default;
186+
tuple &operator=(const std::tuple<> &) { return *this; }
187+
friend bool operator==(const tuple &, const tuple &) { return true; }
188+
};
189+
190+
} // namespace detail
191+
} // namespace sycl
192+
} // __SYCL_INLINE_NAMESPACE(cl)
193+
194+
namespace std {
195+
196+
template <size_t I, typename... Types>
197+
constexpr typename tuple_element<I, tuple<Types...>>::type &
198+
get(cl::sycl::detail::tuple<Types...> &Arg) noexcept {
199+
return cl::sycl::detail::get<I>()(Arg);
200+
}
201+
202+
template <size_t I, typename... Types>
203+
constexpr typename tuple_element<I, tuple<Types...>>::type const &
204+
get(const cl::sycl::detail::tuple<Types...> &Arg) noexcept {
205+
return cl::sycl::detail::get<I>()(Arg);
206+
}
207+
208+
} // namespace std

0 commit comments

Comments
 (0)