intel · sarnex · Aug 26, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
@@ -98,9 +98,12 @@ template <typename TRes, typename TArg, int SZ>
 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
   simd<TArg, SZ> Result;
-  if constexpr (detail::is_generic_floating_point_v<TArg>)
-    Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
-  else
+  if constexpr (detail::is_generic_floating_point_v<TArg>) {
+    using CppT = __ESIMD_DNS::element_type_traits<TArg>::EnclosingCppT;
+    Result =
+        __ESIMD_DNS::convert_vector<TArg, CppT, SZ>(__spirv_ocl_fabs<CppT, SZ>(
+            __ESIMD_DNS::convert_vector<CppT, TArg, SZ>(src0.data())));
+  } else
     Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
   return convert<TRes>(Result);
 }
@@ -184,8 +187,12 @@ template <typename T, int SZ, class Sat = saturation_off_tag>
 __ESIMD_API simd<T, SZ>(max)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
   constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
 
-  if constexpr (std::is_floating_point<T>::value) {
-    auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
+  if constexpr (detail::is_generic_floating_point_v<T>) {
+    using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
+    auto Result =
+        __ESIMD_DNS::convert_vector<T, CppT, SZ>(__spirv_ocl_fmax<CppT, SZ>(
+            __ESIMD_DNS::convert_vector<CppT, T, SZ>(src0.data()),
+            __ESIMD_DNS::convert_vector<CppT, T, SZ>(src1.data())));
     if constexpr (is_sat)
       Result = __esimd_sat<T, T, SZ>(Result);
     return simd<T, SZ>(Result);
@@ -269,8 +276,12 @@ template <typename T, int SZ, class Sat = saturation_off_tag>
 __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
   constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
 
-  if constexpr (std::is_floating_point<T>::value) {
-    auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
+  if constexpr (detail::is_generic_floating_point_v<T>) {
+    using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
+    auto Result =
+        __ESIMD_DNS::convert_vector<T, CppT, SZ>(__spirv_ocl_fmin<CppT, SZ>(
+            __ESIMD_DNS::convert_vector<CppT, T, SZ>(src0.data()),
+            __ESIMD_DNS::convert_vector<CppT, T, SZ>(src1.data())));
     if constexpr (is_sat)
       Result = __esimd_sat<T, T, SZ>(Result);
     return simd<T, SZ>(Result);
@@ -1465,8 +1476,12 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
   template <typename... T>
   simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
-    if constexpr (std::is_floating_point<T1>::value) {
-      return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
+    if constexpr (detail::is_generic_floating_point_v<T1>) {
+      using CppT = __ESIMD_DNS::element_type_traits<T1>::EnclosingCppT;
+      return __ESIMD_DNS::convert_vector<T1, CppT, SZ>(
+          __spirv_ocl_fmax<CppT, SZ>(
+              __ESIMD_DNS::convert_vector<CppT, T1, SZ>(v1.data()),
+              __ESIMD_DNS::convert_vector<CppT, T1, SZ>(v2.data())));
     } else if constexpr (std::is_unsigned<T1>::value) {
       return __esimd_umax<T1, SZ>(v1.data(), v2.data());
     } else {
@@ -1478,8 +1493,13 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
   template <typename... T>
   simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
-    if constexpr (std::is_floating_point<T1>::value) {
-      return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
+
+    if constexpr (detail::is_generic_floating_point_v<T1>) {
+      using CppT = __ESIMD_DNS::element_type_traits<T1>::EnclosingCppT;
+      return __ESIMD_DNS::convert_vector<T1, CppT, SZ>(
+          __spirv_ocl_fmin<CppT, SZ>(
+              __ESIMD_DNS::convert_vector<CppT, T1, SZ>(v1.data()),
+              __ESIMD_DNS::convert_vector<CppT, T1, SZ>(v2.data())));
     } else if constexpr (std::is_unsigned<T1>::value) {
       return __esimd_umin<T1, SZ>(v1.data(), v2.data());
     } else {

@@ -0,0 +1,92 @@
+//==- spirv_fp_test.cpp - Test for abs function -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: arch-intel_gpu_pvc
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/intel/esimd.hpp>
+
+#include <sycl/usm.hpp>
+#include <sycl/usm/usm_allocator.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::intel::esimd;
+using bf16 = sycl::ext::oneapi::bfloat16;
+using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32;
+
+template <typename DataT>
+using shared_allocator = sycl::usm_allocator<DataT, sycl::usm::alloc::shared>;
+template <typename DataT>
+using shared_vector = std::vector<DataT, shared_allocator<DataT>>;
+
+template <typename T, int N>
+bool test(sycl::queue &Queue, T testValue1, T testValue2) {
+  shared_allocator<T> Allocator(Queue);
+
+  shared_vector<T> OutputAbs(N, 0, Allocator);
+  shared_vector<T> OutputMin(N, 0, Allocator);
+  shared_vector<T> OutputMax(N, 0, Allocator);
+
+  auto *OutputAbsPtr = OutputAbs.data();
+  auto *OutputMinPtr = OutputMin.data();
+  auto *OutputMaxPtr = OutputMax.data();
+
+  Queue.submit([&](sycl::handler &cgh) {
+    auto Kernel = ([=]() SYCL_ESIMD_KERNEL {
+      simd<T, N> Input1 = testValue1;
+      simd<T, N> Input2 = testValue2;
+      simd<T, N> ResultAbs = __ESIMD_NS::abs(Input1);
+      simd<T, N> ResultMin = __ESIMD_NS::min(Input1, Input2);
+      simd<T, N> ResultMax = __ESIMD_NS::max(Input1, Input2);
+      ResultAbs.copy_to(OutputAbsPtr);
+      ResultMin.copy_to(OutputMinPtr);
+      ResultMax.copy_to(OutputMaxPtr);
+    });
+    cgh.single_task(Kernel);
+  });
+  Queue.wait();
+
+  for (int I = 0; I < N; I++) {
+    if (std::abs(testValue1) != OutputAbs[I]) {
+      std::cout << "Incorrect value for abs at index " << I << " "
+                << std::abs(testValue1) << " != " << OutputAbs[I] << std::endl;
+      return false;
+    }
+    if (std::min(testValue1, testValue2) != OutputMin[I]) {
+      std::cout << "Incorrect value for min at index " << I << " "
+                << std::min(testValue1, testValue2) << " != " << OutputMin[I]
+                << std::endl;
+      return false;
+    }
+
+    if (std::max(testValue1, testValue2) != OutputMax[I]) {
+      std::cout << "Incorrect value for max at index " << I << " "
+                << std::max(testValue1, testValue2) << " != " << OutputMax[I]
+                << std::endl;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int main() {
+
+  bool Pass = true;
+  sycl::queue Q;
+  Pass &= test<bf16, 8>(Q, -1, -2);
+  Pass &= test<tfloat32, 8>(Q, -1, -2);
+
+  if (Pass)
+    std::cout << "Pass" << std::endl;
+  else
+    std::cout << "Fail" << std::endl;
+
+  return !Pass;
+}