@@ -760,17 +760,20 @@ class __SYCL_EXPORT handler {
760
760
using NameT =
761
761
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
762
762
763
+ // The work group size preferred by this device.
763
764
// A reasonable choice for rounding up the range is 32.
764
765
constexpr size_t GoodLocalSizeX = 32 ;
765
766
766
767
// Disable the rounding-up optimizations under these conditions:
767
- // 1. The env var SYCL_OPT_PFWGS_DISABLE is set
768
- // 2. When the string SYCL_OPT_PFWGS_DISABLE is in the kernel name.
769
- // 3. The kernel is provided via an interoperability method.
770
- // 4. The API "this_item" is used inside the kernel.
771
- // 5. The range is already a multiple of the rounding factor.
768
+ // 1. The device is not a GPU. Only GPUs benefit from rounding.
769
+ // 2. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
770
+ // 3. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in
771
+ // the kernel name.
772
+ // 4. The kernel is provided via an interoperability method.
773
+ // 5. The API "this_item" is used inside the kernel.
774
+ // 6. The range is already a multiple of the rounding factor.
772
775
//
773
- // Cases 3 and 4 could be supported with extra effort.
776
+ // Cases 4 and 5 could be supported with extra effort.
774
777
// As an optimization for the common case it is an
775
778
// implementation choice to not support those scenarios.
776
779
// Note that "this_item" is a free function, i.e. not tied to any
@@ -784,6 +787,7 @@ class __SYCL_EXPORT handler {
784
787
std::string KName = typeid (NameT *).name ();
785
788
using KI = detail::KernelInfo<KernelName>;
786
789
bool DisableRounding =
790
+ !is_gpu (MQueue) ||
787
791
(getenv (" SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" ) != nullptr ) ||
788
792
(KName.find (" SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" ) !=
789
793
std::string::npos) ||
@@ -1949,6 +1953,12 @@ class __SYCL_EXPORT handler {
1949
1953
// / \param Count is a number of bytes to be prefetched.
1950
1954
void prefetch (const void *Ptr, size_t Count);
1951
1955
1956
+ // / Check if the queue being used is for a GPU device
1957
+ // /
1958
+ // / \param Queue is the queue for this handler.
1959
+ // / \return Whether the device is a GPU.
1960
+ bool is_gpu (shared_ptr_class<sycl::detail::queue_impl> Queue);
1961
+
1952
1962
private:
1953
1963
shared_ptr_class<detail::queue_impl> MQueue;
1954
1964
// / The storage for the arguments passed.
0 commit comments