[SYCL] Add sub-group functions emulation for vector of doubles. (#8252)

maksimsab · web-flow · commit 84fe658f0d7f · 2023-02-21T20:26:17.000-08:00
intel/llvm-test-suite#1603
diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp
@@ -498,22 +498,44 @@ AtomicMax(multi_ptr<T, AddressSpace, IsDecorated> MPtr, memory_scope Scope,
 }
 
 // Native shuffles map directly to a shuffle intrinsic:
-// - The Intel SPIR-V extension natively supports all arithmetic types
+// - The Intel SPIR-V extension natively supports all arithmetic types.
+//   However, OpenCL extension natively supports float vectors,
+//   integer vectors, half scalar and double scalar.
+//   For double vectors we perform emulation with scalar version.
 // - The CUDA shfl intrinsics do not support vectors, and we use the _i32
 //   variants for all scalar types
 #ifndef __NVPTX__
+
+template <typename T>
+struct TypeIsProhibitedForShuffleEmulation
+    : bool_constant<std::is_same_v<vector_element_t<T>, double>> {};
+
+template <typename T>
+struct VecTypeIsProhibitedForShuffleEmulation
+    : bool_constant<
+          (detail::get_vec_size<T>::size > 1) &&
+          TypeIsProhibitedForShuffleEmulation<vector_element_t<T>>::value> {};
+
 template <typename T>
 using EnableIfNativeShuffle =
-    detail::enable_if_t<detail::is_arithmetic<T>::value, T>;
-#else
+    std::enable_if_t<detail::is_arithmetic<T>::value &&
+                         !VecTypeIsProhibitedForShuffleEmulation<T>::value,
+                     T>;
+
 template <typename T>
-using EnableIfNativeShuffle = detail::enable_if_t<
+using EnableIfVectorShuffle =
+    std::enable_if_t<VecTypeIsProhibitedForShuffleEmulation<T>::value, T>;
+
+#else  // ifndef __NVPTX__
+
+template <typename T>
+using EnableIfNativeShuffle = std::enable_if_t<
     std::is_integral<T>::value && (sizeof(T) <= sizeof(int32_t)), T>;
 
 template <typename T>
 using EnableIfVectorShuffle =
-    detail::enable_if_t<detail::is_vector_arithmetic<T>::value, T>;
-#endif
+    std::enable_if_t<detail::is_vector_arithmetic<T>::value, T>;
+#endif // ifndef __NVPTX__
 
 #ifdef __NVPTX__
 inline uint32_t membermask() {
@@ -565,7 +587,6 @@ EnableIfNativeShuffle<T> SubgroupShuffleUp(T x, uint32_t delta) {
 #endif
 }
 
-#ifdef __NVPTX__
 template <typename T>
 EnableIfVectorShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
   T result;
@@ -601,7 +622,6 @@ EnableIfVectorShuffle<T> SubgroupShuffleUp(T x, uint32_t delta) {
   }
   return result;
 }
-#endif
 
 // Bitcast shuffles can be implemented using a single SubgroupShuffle
 // intrinsic, but require type-punning via an appropriate integer type
diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp
@@ -231,6 +231,14 @@ template <typename> struct is_vec : std::false_type {};
 template <typename T, std::size_t N>
 struct is_vec<sycl::vec<T, N>> : std::true_type {};
 
+template <typename> struct get_vec_size {
+  static constexpr std::size_t size = 1;
+};
+
+template <typename T, std::size_t N> struct get_vec_size<sycl::vec<T, N>> {
+  static constexpr std::size_t size = N;
+};
+
 // is_integral
 template <typename T>
 struct is_integral : std::is_integral<vector_element_t<T>> {};