[SYCL][ESIMD] Add support for tf32 (#6828)

fineg74 · web-flow · commit 78eb0883608c · 2022-09-22T15:35:45.000-07:00
diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp
@@ -51,7 +51,8 @@ static const char *LegalSYCLFunctions[] = {
     "^sycl::_V1::ext::oneapi::sub_group::.+",
     "^sycl::_V1::ext::oneapi::experimental::spec_constant<.+>::.+",
     "^sycl::_V1::ext::oneapi::experimental::this_sub_group",
-    "^sycl::_V1::ext::oneapi::experimental::bfloat16::.+"};
+    "^sycl::_V1::ext::oneapi::experimental::bfloat16::.+",
+    "^sycl::_V1::ext::oneapi::experimental::tfloat32::.+"};
 
 static const char *LegalSYCLFunctionsInStatelessMode[] = {
     "^sycl::_V1::multi_ptr<.+>::get",
diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
@@ -654,7 +654,8 @@ class ESIMDIntrinDescTable {
         {"test_src_tmpl_arg",
          {"test.src.tmpl.arg", {t(0), t1(1), t8(2), t16(3), t32(4), c8(17)}}},
         {"slm_init", {"slm.init", {a(0)}}},
-        {"bf_cvt", {"bf.cvt", {a(0)}}}};
+        {"bf_cvt", {"bf.cvt", {a(0)}}},
+        {"tf32_cvt", {"tf32.cvt", {a(0)}}}};
   }
 
   const IntrinTable &getTable() { return Table; }
diff --git a/sycl/include/sycl/ext/intel/esimd.hpp b/sycl/include/sycl/ext/intel/esimd.hpp
@@ -83,6 +83,7 @@
 #include <sycl/ext/intel/esimd/common.hpp>
 #include <sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp>
 #include <sycl/ext/intel/esimd/detail/half_type_traits.hpp>
+#include <sycl/ext/intel/esimd/detail/tfloat32_type_traits.hpp>
 #include <sycl/ext/intel/esimd/simd.hpp>
 #include <sycl/ext/intel/esimd/simd_view.hpp>
 #include <sycl/ext/intel/esimd/xmx/dpas.hpp>
diff --git a/sycl/include/sycl/ext/intel/esimd/detail/intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/intrin.hpp
@@ -345,4 +345,9 @@ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
 __esimd_bf_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
 #endif // __SYCL_DEVICE_ONLY__
 
-/// @endcond ESIMD_DETAIL
+#ifdef __SYCL_DEVICE_ONLY__
+template <class To, class From, int N>
+__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
+__esimd_tf32_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
+#endif // __SYCL_DEVICE_ONLY__
+       /// @endcond ESIMD_DETAIL
diff --git a/sycl/include/sycl/ext/intel/esimd/detail/tfloat32_type_traits.hpp b/sycl/include/sycl/ext/intel/esimd/detail/tfloat32_type_traits.hpp
@@ -0,0 +1,87 @@
+//==-------------- tfloat32_type_traits.hpp - DPC++ Explicit SIMD API
+//----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Implementation of SIMD element type traits for the tfloat32 type.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/ext/intel/esimd/detail/elem_type_traits.hpp>
+#include <sycl/ext/intel/esimd/detail/intrin.hpp>
+#include <sycl/ext/intel/experimental/esimd/tfloat32.hpp>
+
+/// @cond ESIMD_DETAIL
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+namespace ext::intel::esimd::detail {
+
+// Standalone definitions to use w/o instantiating element_type_traits.
+using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32;
+
+template <> struct element_type_traits<tfloat32> {
+  using RawT = unsigned int;
+  using EnclosingCppT = float;
+
+  static inline constexpr bool use_native_cpp_ops = false;
+  static inline constexpr bool is_floating_point = true;
+};
+
+// ------------------- Type conversion traits
+
+template <int N> struct vector_conversion_traits<tfloat32, N> {
+  using StdT = __cpp_t<tfloat32>;
+  using RawT = __raw_t<tfloat32>;
+
+  static ESIMD_INLINE vector_type_t<RawT, N>
+  convert_to_raw(vector_type_t<StdT, N> Val) {
+#ifdef __SYCL_DEVICE_ONLY__
+    vector_type_t<RawT, N> Result = __esimd_tf32_cvt<RawT, StdT, N>(Val);
+    return Result;
+#else
+    vector_type_t<RawT, N> Output = 0;
+
+    for (int i = 0; i < N; i++) {
+      Output[i] = sycl::bit_cast<RawT>(static_cast<tfloat32>(Val[i]));
+    }
+    return Output;
+#endif
+  }
+
+  static ESIMD_INLINE vector_type_t<StdT, N>
+  convert_to_cpp(vector_type_t<RawT, N> Val) {
+    vector_type_t<StdT, N> Result = sycl::bit_cast<vector_type_t<StdT, N>>(Val);
+    return Result;
+  }
+};
+
+template <> struct scalar_conversion_traits<tfloat32> {
+  using RawT = __raw_t<tfloat32>;
+
+  static ESIMD_INLINE RawT bitcast_to_raw(tfloat32 Val) {
+    return sycl::bit_cast<RawT>(Val);
+  }
+
+  static ESIMD_INLINE tfloat32 bitcast_to_wrapper(RawT Val) {
+    return sycl::bit_cast<tfloat32>(Val);
+  }
+};
+
+// Misc
+inline std::ostream &operator<<(std::ostream &O, tfloat32 const &rhs) {
+  O << static_cast<float>(rhs);
+  return O;
+}
+
+template <> struct is_esimd_arithmetic_type<tfloat32, void> : std::true_type {};
+
+} // namespace ext::intel::esimd::detail
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
+
+/// @endcond ESIMD_DETAIL
diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp
@@ -357,8 +357,15 @@ ESIMD_INLINE
     __esimd_scatter_scaled<PromoT, N, decltype(si), TypeSizeLog2, scale>(
         mask.data(), si, glob_offset, offsets.data(), promo_vals.data());
   } else {
-    __esimd_scatter_scaled<T, N, decltype(si), TypeSizeLog2, scale>(
-        mask.data(), si, glob_offset, offsets.data(), vals.data());
+    using Treal = __raw_t<T>;
+    if constexpr (!std::is_same_v<Treal, T>) {
+      simd<Treal, N> Values = vals.template bit_cast_view<Treal>();
+      __esimd_scatter_scaled<Treal, N, decltype(si), TypeSizeLog2, scale>(
+          mask.data(), si, glob_offset, offsets.data(), Values.data());
+    } else {
+      __esimd_scatter_scaled<T, N, decltype(si), TypeSizeLog2, scale>(
+          mask.data(), si, glob_offset, offsets.data(), vals.data());
+    }
   }
 }
 
@@ -396,9 +403,15 @@ gather_impl(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
       return Res;
     }
   } else {
-    return __esimd_gather_masked_scaled2<T, N, decltype(si), TypeSizeLog2,
-                                         scale>(si, glob_offset, offsets.data(),
-                                                mask.data());
+    using Treal = __raw_t<T>;
+    simd<Treal, N> Res = __esimd_gather_masked_scaled2<Treal, N, decltype(si),
+                                                       TypeSizeLog2, scale>(
+        si, glob_offset, offsets.data(), mask.data());
+    if constexpr (!std::is_same_v<Treal, T>) {
+      return Res.template bit_cast_view<T>();
+    } else {
+      return Res;
+    }
   }
 }
 
diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp
@@ -0,0 +1,69 @@
+//==--------- tfloat32.hpp ------- SYCL tensorfloat32 conversion ------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Implementation of SIMD tfloat32 type.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/__spirv/spirv_ops.hpp>
+#include <sycl/bit_cast.hpp>
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+namespace ext {
+namespace intel {
+namespace experimental {
+namespace esimd {
+
+class tfloat32 {
+  using storage_t = uint32_t;
+  storage_t value;
+
+public:
+  tfloat32() = default;
+  tfloat32(const tfloat32 &) = default;
+  ~tfloat32() = default;
+
+  // Explicit conversion functions
+  static storage_t from_float(const float &a) {
+    storage_t tmp_uint = sycl::bit_cast<storage_t>(a);
+    tmp_uint &= 0xFFFFE000u;
+    return tmp_uint;
+  }
+  static float to_float(const storage_t &a) {
+    return sycl::bit_cast<float>(a & 0xFFFFE000u);
+  }
+
+  // Implicit conversion from float to tfloat32
+  tfloat32(const float &a) { value = from_float(a); }
+
+  tfloat32 &operator=(const float &rhs) {
+    value = from_float(rhs);
+    return *this;
+  }
+
+  // Implicit conversion from tfloat32 to float
+  operator float() const { return to_float(value); }
+
+  // Get raw bits representation of tfloat32
+  storage_t raw() const { return value; }
+
+  // Logical operators (!,||,&&) are covered if we can cast to bool
+  explicit operator bool() { return to_float(value) != 0.0f; }
+
+  // Unary minus operator overloading
+  friend tfloat32 operator-(tfloat32 &lhs) { return tfloat32(-to_float(lhs)); }
+};
+
+} // namespace esimd
+} // namespace experimental
+} // namespace intel
+} // namespace ext
+
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl

Original file line number	Diff line number	Diff line change
`@@ -654,7 +654,8 @@ class ESIMDIntrinDescTable {`
`654`	`654`	`{"test_src_tmpl_arg",`
`655`	`655`	`{"test.src.tmpl.arg", {t(0), t1(1), t8(2), t16(3), t32(4), c8(17)}}},`
`656`	`656`	`{"slm_init", {"slm.init", {a(0)}}},`
`657`		`- {"bf_cvt", {"bf.cvt", {a(0)}}}};`
	`657`	`+ {"bf_cvt", {"bf.cvt", {a(0)}}},`
	`658`	`+ {"tf32_cvt", {"tf32.cvt", {a(0)}}}};`
`658`	`659`	`}`
`659`	`660`
`660`	`661`	`const IntrinTable &getTable() { return Table; }`