Address review comments

aelovikov-intel · aelovikov-intel · commit c74a69d772b3 · 2022-07-05T16:25:47.000-07:00
diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp
@@ -462,7 +462,7 @@ class __SYCL_EXPORT handler {
     MStreamStorage.push_back(Stream);
   }
 
-  /// Helper utility for operation widely used throught different reduction
+  /// Helper utility for operation widely used through different reduction
   /// implementations.
   /// @{
   template <class FunctorTy>
@@ -1654,7 +1654,7 @@ class __SYCL_EXPORT handler {
     if constexpr (!Reduction::has_fast_atomics &&
                   !Reduction::has_atomic_add_float64) {
       // The most basic implementation.
-      parallel_for_Impl<KernelName>(Range, Redu, KernelFunc);
+      parallel_for_impl<KernelName>(Range, Redu, KernelFunc);
       return;
     } else { // Can't "early" return for "if constexpr".
       std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
@@ -1672,7 +1672,7 @@ class __SYCL_EXPORT handler {
                                                               Range, Redu);
         } else {
           // Resort to basic implementation as well.
-          parallel_for_Impl<KernelName>(Range, Redu, KernelFunc);
+          parallel_for_impl<KernelName>(Range, Redu, KernelFunc);
           return;
         }
       } else {
@@ -1702,7 +1702,7 @@ class __SYCL_EXPORT handler {
 
   template <typename KernelName, typename KernelType, int Dims,
             typename Reduction>
-  void parallel_for_Impl(nd_range<Dims> Range, Reduction Redu,
+  void parallel_for_impl(nd_range<Dims> Range, Reduction Redu,
                          KernelType KernelFunc) {
     // This parallel_for() is lowered to the following sequence:
     // 1) Call a kernel that a) call user's lambda function and b) performs
diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp
@@ -2160,10 +2160,6 @@ void reduCGFuncImplAtomic64(handler &CGH, KernelType KernelFunc,
 template <typename KernelName, typename KernelType, int Dims, class Reduction>
 void reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc,
                         const nd_range<Dims> &Range, Reduction &Redu) {
-  // static_assert(
-  //     Reduction::has_atomic_add_float64,
-  //     "Expected to be called for reductions with atomic add FP64 support!");
-
   auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH);
   reduCGFuncImplAtomic64<KernelName, KernelType, Dims, Reduction>(
       CGH, KernelFunc, Range, Redu, Out);
@@ -2452,7 +2448,7 @@ void reduSaveFinalResultToUserMemHelper(
     bool IsHost, Reduction &Redu, RestT... Rest) {
   // Reductions initialized with USM pointer currently do not require copying
   // because the last kernel writes directly to the USM memory.
-  if constexpr (!Reduction::is_usm)
+  if constexpr (!Reduction::is_usm) {
     if (Redu.hasUserDiscardWriteAccessor()) {
       event CopyEvent =
           handler::withAuxHandler(Queue, IsHost, [&](handler &CopyHandler) {
@@ -2465,6 +2461,7 @@ void reduSaveFinalResultToUserMemHelper(
           });
       Events.push_back(CopyEvent);
     }
+  }
   reduSaveFinalResultToUserMemHelper(Events, Queue, IsHost, Rest...);
 }