@@ -140,17 +140,10 @@ template <typename... Ts> ReduTupleT<Ts...> makeReduTupleT(Ts... Elements) {
140
140
return sycl::detail::make_tuple (Elements...);
141
141
}
142
142
143
- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
144
- __SYCL_EXPORT size_t reduGetMaxWGSize (const std::shared_ptr<queue_impl> &Queue,
145
- size_t LocalMemBytesPerWorkItem);
146
- __SYCL_EXPORT size_t reduGetPreferredWGSize (
147
- const std::shared_ptr<queue_impl> &Queue, size_t LocalMemBytesPerWorkItem);
148
- #else
149
- __SYCL_EXPORT size_t reduGetMaxWGSize (std::shared_ptr<queue_impl> Queue,
143
+ __SYCL_EXPORT size_t reduGetMaxWGSize (handler &cgh,
150
144
size_t LocalMemBytesPerWorkItem);
151
- __SYCL_EXPORT size_t reduGetPreferredWGSize (std::shared_ptr<queue_impl> &Queue ,
145
+ __SYCL_EXPORT size_t reduGetPreferredWGSize (handler &cgh ,
152
146
size_t LocalMemBytesPerWorkItem);
153
- #endif
154
147
__SYCL_EXPORT size_t reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize,
155
148
size_t &NWorkGroups);
156
149
@@ -1708,8 +1701,7 @@ struct NDRangeReduction<
1708
1701
reduction::strategy::group_reduce_and_multiple_kernels> {
1709
1702
template <typename KernelName, int Dims, typename PropertiesT,
1710
1703
typename KernelType, typename Reduction>
1711
- static void run (handler &CGH,
1712
- const std::shared_ptr<detail::queue_impl> &Queue,
1704
+ static void run (handler &CGH, const std::shared_ptr<detail::queue_impl> &,
1713
1705
nd_range<Dims> NDRange, PropertiesT &Properties,
1714
1706
Reduction &Redu, KernelType &KernelFunc) {
1715
1707
static_assert (Reduction::has_identity,
@@ -1729,7 +1721,7 @@ struct NDRangeReduction<
1729
1721
// TODO: currently the maximal work group size is determined for the given
1730
1722
// queue/device, while it may be safer to use queries to the kernel compiled
1731
1723
// for the device.
1732
- size_t MaxWGSize = reduGetMaxWGSize (Queue , OneElemSize);
1724
+ size_t MaxWGSize = reduGetMaxWGSize (CGH , OneElemSize);
1733
1725
if (NDRange.get_local_range ().size () > MaxWGSize)
1734
1726
throw sycl::exception (make_error_code (errc::nd_range),
1735
1727
" The implementation handling parallel_for with"
@@ -1847,8 +1839,7 @@ struct NDRangeReduction<
1847
1839
template <> struct NDRangeReduction <reduction::strategy::basic> {
1848
1840
template <typename KernelName, int Dims, typename PropertiesT,
1849
1841
typename KernelType, typename Reduction>
1850
- static void run (handler &CGH,
1851
- const std::shared_ptr<detail::queue_impl> &Queue,
1842
+ static void run (handler &CGH, const std::shared_ptr<detail::queue_impl> &,
1852
1843
nd_range<Dims> NDRange, PropertiesT &Properties,
1853
1844
Reduction &Redu, KernelType &KernelFunc) {
1854
1845
using element_type = typename Reduction::reducer_element_type;
@@ -1858,7 +1849,7 @@ template <> struct NDRangeReduction<reduction::strategy::basic> {
1858
1849
// TODO: currently the maximal work group size is determined for the given
1859
1850
// queue/device, while it may be safer to use queries to the kernel
1860
1851
// compiled for the device.
1861
- size_t MaxWGSize = reduGetMaxWGSize (Queue , OneElemSize);
1852
+ size_t MaxWGSize = reduGetMaxWGSize (CGH , OneElemSize);
1862
1853
if (NDRange.get_local_range ().size () > MaxWGSize)
1863
1854
throw sycl::exception (make_error_code (errc::nd_range),
1864
1855
" The implementation handling parallel_for with"
@@ -2623,9 +2614,9 @@ tuple_select_elements(TupleT Tuple, std::index_sequence<Is...>) {
2623
2614
template <> struct NDRangeReduction <reduction::strategy::multi> {
2624
2615
template <typename KernelName, int Dims, typename PropertiesT,
2625
2616
typename ... RestT>
2626
- static void
2627
- run (handler &CGH, const std::shared_ptr<detail::queue_impl> &Queue ,
2628
- nd_range<Dims> NDRange, PropertiesT &Properties, RestT... Rest) {
2617
+ static void run (handler &CGH, const std::shared_ptr<detail::queue_impl> &,
2618
+ nd_range<Dims> NDRange, PropertiesT &Properties ,
2619
+ RestT... Rest) {
2629
2620
std::tuple<RestT...> ArgsTuple (Rest...);
2630
2621
constexpr size_t NumArgs = sizeof ...(RestT);
2631
2622
auto KernelFunc = std::get<NumArgs - 1 >(ArgsTuple);
@@ -2636,7 +2627,7 @@ template <> struct NDRangeReduction<reduction::strategy::multi> {
2636
2627
// TODO: currently the maximal work group size is determined for the given
2637
2628
// queue/device, while it is safer to use queries to the kernel compiled
2638
2629
// for the device.
2639
- size_t MaxWGSize = reduGetMaxWGSize (Queue , LocalMemPerWorkItem);
2630
+ size_t MaxWGSize = reduGetMaxWGSize (CGH , LocalMemPerWorkItem);
2640
2631
if (NDRange.get_local_range ().size () > MaxWGSize)
2641
2632
throw sycl::exception (make_error_code (errc::nd_range),
2642
2633
" The implementation handling parallel_for with"
@@ -2731,8 +2722,7 @@ void reduction_parallel_for(handler &CGH, nd_range<Dims> NDRange,
2731
2722
Properties, Rest...);
2732
2723
}
2733
2724
2734
- __SYCL_EXPORT uint32_t
2735
- reduGetMaxNumConcurrentWorkGroups (std::shared_ptr<queue_impl> Queue);
2725
+ __SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups (handler &cgh);
2736
2726
2737
2727
template <typename KernelName, reduction::strategy Strategy, int Dims,
2738
2728
typename PropertiesT, typename ... RestT>
@@ -2763,13 +2753,13 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
2763
2753
#ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS
2764
2754
__SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS;
2765
2755
#else
2766
- reduGetMaxNumConcurrentWorkGroups (CGH. MQueue );
2756
+ reduGetMaxNumConcurrentWorkGroups (CGH);
2767
2757
#endif
2768
2758
2769
2759
// TODO: currently the preferred work group size is determined for the given
2770
2760
// queue/device, while it is safer to use queries to the kernel pre-compiled
2771
2761
// for the device.
2772
- size_t PrefWGSize = reduGetPreferredWGSize (CGH. MQueue , OneElemSize);
2762
+ size_t PrefWGSize = reduGetPreferredWGSize (CGH, OneElemSize);
2773
2763
2774
2764
size_t NWorkItems = Range.size ();
2775
2765
size_t WGSize = std::min (NWorkItems, PrefWGSize);
0 commit comments