12
12
#include < CL/sycl/ONEAPI/group_algorithm.hpp>
13
13
#include < CL/sycl/accessor.hpp>
14
14
#include < CL/sycl/atomic.hpp>
15
+ #include < CL/sycl/detail/tuple.hpp>
15
16
#include < CL/sycl/handler.hpp>
16
17
#include < CL/sycl/kernel.hpp>
17
18
@@ -30,6 +31,15 @@ using cl::sycl::detail::is_sgeninteger;
30
31
using cl::sycl::detail::queue_impl;
31
32
using cl::sycl::detail::remove_AS;
32
33
34
+ // std::tuple seems to be a) too heavy and b) not copyable to device now
35
+ // Thus sycl::detail::tuple is used instead.
36
+ // Switching from sycl::device::tuple to std::tuple can be done by re-defining
37
+ // the ReduTupleT type and makeReduTupleT() function below.
38
+ template <typename ... Ts> using ReduTupleT = sycl::detail::tuple<Ts...>;
39
+ template <typename ... Ts> ReduTupleT<Ts...> makeReduTupleT (Ts... Elements) {
40
+ return sycl::detail::make_tuple (Elements...);
41
+ }
42
+
33
43
__SYCL_EXPORT size_t reduGetMaxWGSize (shared_ptr_class<queue_impl> Queue,
34
44
size_t LocalMemBytesPerWorkItem);
35
45
__SYCL_EXPORT size_t reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize,
@@ -1290,7 +1300,7 @@ reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) {
1290
1300
template <typename ... Reductions, size_t ... Is>
1291
1301
auto createReduLocalAccs (size_t Size, handler &CGH,
1292
1302
std::index_sequence<Is...>) {
1293
- return std::make_tuple (
1303
+ return makeReduTupleT (
1294
1304
std::tuple_element_t <Is, std::tuple<Reductions...>>::getReadWriteLocalAcc (
1295
1305
Size, CGH)...);
1296
1306
}
@@ -1302,7 +1312,7 @@ template <bool IsOneWG, typename... Reductions, size_t... Is>
1302
1312
auto createReduOutAccs (size_t NWorkGroups, handler &CGH,
1303
1313
std::tuple<Reductions...> &ReduTuple,
1304
1314
std::index_sequence<Is...>) {
1305
- return std::make_tuple (
1315
+ return makeReduTupleT (
1306
1316
std::get<Is>(ReduTuple).template getWriteMemForPartialReds <IsOneWG>(
1307
1317
NWorkGroups, CGH)...);
1308
1318
}
@@ -1314,19 +1324,19 @@ template <typename... Reductions, size_t... Is>
1314
1324
auto getReadAccsToPreviousPartialReds (handler &CGH,
1315
1325
std::tuple<Reductions...> &ReduTuple,
1316
1326
std::index_sequence<Is...>) {
1317
- return std::make_tuple (
1327
+ return makeReduTupleT (
1318
1328
std::get<Is>(ReduTuple).getReadAccToPreviousPartialReds (CGH)...);
1319
1329
}
1320
1330
1321
1331
template <typename ... Reductions, size_t ... Is>
1322
- std::tuple <typename Reductions::result_type...>
1332
+ ReduTupleT <typename Reductions::result_type...>
1323
1333
getReduIdentities (std::tuple<Reductions...> &ReduTuple,
1324
1334
std::index_sequence<Is...>) {
1325
1335
return {std::get<Is>(ReduTuple).getIdentity ()...};
1326
1336
}
1327
1337
1328
1338
template <typename ... Reductions, size_t ... Is>
1329
- std::tuple <typename Reductions::binary_operation...>
1339
+ ReduTupleT <typename Reductions::binary_operation...>
1330
1340
getReduBOPs (std::tuple<Reductions...> &ReduTuple, std::index_sequence<Is...>) {
1331
1341
return {std::get<Is>(ReduTuple).getBinaryOperation ()...};
1332
1342
}
@@ -1340,8 +1350,8 @@ getInitToIdentityProperties(std::tuple<Reductions...> &ReduTuple,
1340
1350
1341
1351
template <typename ... Reductions, size_t ... Is>
1342
1352
std::tuple<typename Reductions::reducer_type...>
1343
- createReducers (std::tuple <typename Reductions::result_type...> Identities,
1344
- std::tuple <typename Reductions::binary_operation...> BOPsTuple,
1353
+ createReducers (ReduTupleT <typename Reductions::result_type...> Identities,
1354
+ ReduTupleT <typename Reductions::binary_operation...> BOPsTuple,
1345
1355
std::index_sequence<Is...>) {
1346
1356
return {typename Reductions::reducer_type{std::get<Is>(Identities),
1347
1357
std::get<Is>(BOPsTuple)}...};
@@ -1357,9 +1367,9 @@ void callReduUserKernelFunc(KernelType KernelFunc, nd_item<Dims> NDIt,
1357
1367
template <bool Pow2WG, typename ... LocalAccT, typename ... ReducerT,
1358
1368
typename ... ResultT, size_t ... Is>
1359
1369
void initReduLocalAccs (size_t LID, size_t WGSize,
1360
- std::tuple <LocalAccT...> LocalAccs,
1370
+ ReduTupleT <LocalAccT...> LocalAccs,
1361
1371
const std::tuple<ReducerT...> &Reducers,
1362
- const std::tuple <ResultT...> Identities,
1372
+ ReduTupleT <ResultT...> Identities,
1363
1373
std::index_sequence<Is...>) {
1364
1374
std::tie (std::get<Is>(LocalAccs)[LID]...) =
1365
1375
std::make_tuple (std::get<Is>(Reducers).MValue ...);
@@ -1375,9 +1385,9 @@ void initReduLocalAccs(size_t LID, size_t WGSize,
1375
1385
template <bool UniformPow2WG, typename ... LocalAccT, typename ... InputAccT,
1376
1386
typename ... ResultT, size_t ... Is>
1377
1387
void initReduLocalAccs (size_t LID, size_t GID, size_t NWorkItems, size_t WGSize,
1378
- std::tuple <InputAccT...> LocalAccs,
1379
- std::tuple <LocalAccT...> InputAccs,
1380
- const std::tuple <ResultT...> Identities,
1388
+ ReduTupleT <InputAccT...> LocalAccs,
1389
+ ReduTupleT <LocalAccT...> InputAccs,
1390
+ ReduTupleT <ResultT...> Identities,
1381
1391
std::index_sequence<Is...>) {
1382
1392
// Normally, the local accessors are initialized with elements from the input
1383
1393
// accessors. The exception is the case when (GID >= NWorkItems), which
@@ -1402,8 +1412,8 @@ void initReduLocalAccs(size_t LID, size_t GID, size_t NWorkItems, size_t WGSize,
1402
1412
1403
1413
template <typename ... LocalAccT, typename ... BOPsT, size_t ... Is>
1404
1414
void reduceReduLocalAccs (size_t IndexA, size_t IndexB,
1405
- std::tuple <LocalAccT...> LocalAccs,
1406
- std::tuple <BOPsT...> BOPs,
1415
+ ReduTupleT <LocalAccT...> LocalAccs,
1416
+ ReduTupleT <BOPsT...> BOPs,
1407
1417
std::index_sequence<Is...>) {
1408
1418
std::tie (std::get<Is>(LocalAccs)[IndexA]...) =
1409
1419
std::make_tuple ((std::get<Is>(BOPs)(std::get<Is>(LocalAccs)[IndexA],
@@ -1415,8 +1425,8 @@ template <bool Pow2WG, bool IsOneWG, typename... Reductions,
1415
1425
typename ... Ts, size_t ... Is>
1416
1426
void writeReduSumsToOutAccs (
1417
1427
size_t OutAccIndex, size_t WGSize, std::tuple<Reductions...> *,
1418
- std::tuple <OutAccT...> OutAccs, std::tuple <LocalAccT...> LocalAccs,
1419
- std::tuple <BOPsT...> BOPs, std::tuple <Ts...> IdentityVals,
1428
+ ReduTupleT <OutAccT...> OutAccs, ReduTupleT <LocalAccT...> LocalAccs,
1429
+ ReduTupleT <BOPsT...> BOPs, ReduTupleT <Ts...> IdentityVals,
1420
1430
std::array<bool , sizeof ...(Reductions)> IsInitializeToIdentity,
1421
1431
std::index_sequence<Is...>) {
1422
1432
// Add the initial value of user's variable to the final result.
@@ -1528,9 +1538,9 @@ void reduCGFuncImpl(handler &CGH, KernelType KernelFunc,
1528
1538
auto OutAccsTuple =
1529
1539
createReduOutAccs<IsOneWG>(NWorkGroups, CGH, ReduTuple, ReduIndices);
1530
1540
auto IdentitiesTuple = getReduIdentities (ReduTuple, ReduIndices);
1541
+ auto BOPsTuple = getReduBOPs (ReduTuple, ReduIndices);
1531
1542
auto InitToIdentityProps =
1532
1543
getInitToIdentityProperties (ReduTuple, ReduIndices);
1533
- auto BOPsTuple = getReduBOPs (ReduTuple, ReduIndices);
1534
1544
1535
1545
using Name = typename get_reduction_main_kernel_name_t <
1536
1546
KernelName, KernelType, Pow2WG, IsOneWG, decltype (OutAccsTuple)>::name;
0 commit comments