Reductions now set max_wg to the minimum of the max work group size and 2048

ndgrigorian · ndgrigorian · commit 2ea75e0f3ae6 · 2023-09-14T15:14:25.000-07:00
- This prevents running out of resources when using local memory on CPU
diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp
@@ -977,7 +977,10 @@ sycl::event reduction_over_group_temps_strided_impl(
     size_t wg = choose_workgroup_size<4>(reduction_nelems, sg_sizes);
 
     constexpr size_t preferrered_reductions_per_wi = 4;
-    size_t max_wg = d.get_info<sycl::info::device::max_work_group_size>();
+    // max_max_wg prevents running out of resources on CPU
+    constexpr size_t max_max_wg = 2048;
+    size_t max_wg = std::min(
+        max_max_wg, d.get_info<sycl::info::device::max_work_group_size>());
 
     size_t reductions_per_wi(preferrered_reductions_per_wi);
     if (reduction_nelems <= preferrered_reductions_per_wi * max_wg) {