[ESIMD] Support bfloat16 simd vector element type.

kbobrovs · kbobrovs · commit 1cb5cb0c50b4 · 2022-08-31T15:33:51.000-07:00
- Implement corresponding element type traits
- Implement __esimd_bitcast and __esimd_bf_cvt intrinsics
diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
@@ -654,7 +654,7 @@ class ESIMDIntrinDescTable {
         {"test_src_tmpl_arg",
          {"test.src.tmpl.arg", {t(0), t1(1), t8(2), t16(3), t32(4), c8(17)}}},
         {"slm_init", {"slm.init", {a(0)}}},
-    };
+        {"bf_cvt", {"bf.cvt", {a(0)}}}};
   }
 
   const IntrinTable &getTable() { return Table; }
@@ -1106,6 +1106,15 @@ static void translateGetSurfaceIndex(CallInst &CI) {
   CI.replaceAllUsesWith(SI);
 }
 
+static void translateBitcast(CallInst &CI) {
+  auto opnd = CI.getArgOperand(0);
+  IRBuilder<> Builder(&CI);
+  auto BC = Builder.CreateBitCast(opnd, CI.getType());
+  auto *SI = cast<CastInst>(BC);
+  SI->setDebugLoc(CI.getDebugLoc());
+  CI.replaceAllUsesWith(SI);
+}
+
 // Newly created GenX intrinsic might have different return type than expected.
 // This helper function creates cast operation from GenX intrinsic return type
 // to currently expected. Returns pointer to created cast instruction if it
@@ -1766,6 +1775,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
         ToErase.push_back(CI);
         continue;
       }
+      if (Name.startswith("__esimd_bitcast")) {
+        translateBitcast(*CI);
+        ToErase.push_back(CI);
+        continue;
+      }
       assert(!Name.startswith("__esimd_set_kernel_properties") &&
              "__esimd_set_kernel_properties must have been lowered");
 
diff --git a/sycl/include/sycl/ext/intel/esimd.hpp b/sycl/include/sycl/ext/intel/esimd.hpp
@@ -81,6 +81,7 @@
 
 #include <sycl/ext/intel/esimd/alt_ui.hpp>
 #include <sycl/ext/intel/esimd/common.hpp>
+#include <sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp>
 #include <sycl/ext/intel/esimd/detail/half_type_traits.hpp>
 #include <sycl/ext/intel/esimd/simd.hpp>
 #include <sycl/ext/intel/esimd/simd_view.hpp>
diff --git a/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp b/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp
@@ -0,0 +1,108 @@
+//==-------------- bfloat16_type_traits.hpp - DPC++ Explicit SIMD API ------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Implementation of SIMD element type traits for the bfloat16 type.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/ext/intel/esimd/detail/elem_type_traits.hpp>
+#include <sycl/ext/intel/esimd/detail/intrin.hpp>
+
+#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
+
+/// @cond ESIMD_DETAIL
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+namespace ext::intel::esimd::detail {
+
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
+
+template <> struct element_type_traits<bfloat16> {
+  // TODO map the raw type to __bf16 once SPIRV target supports it:
+  using RawT =
+      typename std::invoke_result_t<decltype(&bfloat16::raw), bfloat16>;
+  // Nearest standard enclosing C++ type to delegate natively unsupported
+  // operations to:
+  using EnclosingCppT = float;
+  // Can't map bfloat16 operations to opertations on RawT:
+  static inline constexpr bool use_native_cpp_ops = false;
+  static inline constexpr bool is_floating_point = true;
+};
+
+#ifdef __SYCL_DEVICE_ONLY__
+// VC BE-specific glitch
+// @llvm.genx.bf.cvt uses half (_Float16) as bit representation for bfloat16
+using vc_be_bfloat16_raw_t = _Float16;
+#endif // __SYCL_DEVICE_ONLY__
+
+// ------------------- Type conversion traits
+
+template <int N> struct vector_conversion_traits<bfloat16, N> {
+  using StdT = __cpp_t<bfloat16>;
+  using StdVecT = vector_type_t<StdT, N>;
+  using RawT = __raw_t<bfloat16>;
+
+  static ESIMD_INLINE vector_type_t<RawT, N>
+  convert_to_raw(vector_type_t<StdT, N> Val) {
+#ifdef __SYCL_DEVICE_ONLY__
+    using RawVecT = vector_type_t<vc_be_bfloat16_raw_t, N>;
+    RawVecT ConvVal = __esimd_bf_cvt<vc_be_bfloat16_raw_t, StdT, N>(Val);
+    // cast from _Float16 to int16_t:
+    return __esimd_bitcast<vector_type_t<RawT, N>>(ConvVal);
+#else
+    vector_type_t<RawT, N> Output = 0;
+
+    for (int i = 0; i < N; i++) {
+      Output[i] = bfloat16(Val[i]).raw();
+    }
+    return Output;
+#endif // __SYCL_DEVICE_ONLY__
+  }
+
+  static ESIMD_INLINE vector_type_t<StdT, N>
+  convert_to_cpp(vector_type_t<RawT, N> Val) {
+#ifdef __SYCL_DEVICE_ONLY__
+    using RawVecT = vector_type_t<vc_be_bfloat16_raw_t, N>;
+    RawVecT Bits = __esimd_bitcast<RawVecT>(Val);
+    return __esimd_bf_cvt<StdT, vc_be_bfloat16_raw_t, N>(Bits);
+#else
+    vector_type_t<StdT, N> Output;
+
+    for (int i = 0; i < N; i++) {
+      Output[i] = bfloat16::from_bits(Val[i]);
+    }
+    return Output;
+#endif // __SYCL_DEVICE_ONLY__
+  }
+};
+
+template <> struct scalar_conversion_traits<bfloat16> {
+  using RawT = __raw_t<bfloat16>;
+
+  static ESIMD_INLINE RawT bitcast_to_raw(bfloat16 Val) { return Val.raw(); }
+
+  static ESIMD_INLINE bfloat16 bitcast_to_wrapper(RawT Val) {
+    return bfloat16::from_bits(Val);
+  }
+};
+
+// bfloat16 uses default inefficient implementations of std C++ operations,
+// hence no specializations of other traits.
+
+// Misc
+inline std::ostream &operator<<(std::ostream &O, bfloat16 const &rhs) {
+  O << static_cast<float>(rhs);
+  return O;
+}
+
+} // namespace ext::intel::esimd::detail
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
+
+/// @endcond ESIMD_DETAIL
diff --git a/sycl/include/sycl/ext/intel/esimd/detail/half_type_traits.hpp b/sycl/include/sycl/ext/intel/esimd/detail/half_type_traits.hpp
@@ -96,10 +96,7 @@ template <int N> struct vector_conversion_traits<sycl::half, N> {
 };
 
 // Proxy class to access bit representation of a wrapper type both on host and
-// device. Declared as friend to the wrapper types (e.g. sycl::half).
-// Specific type traits implementations (scalar_conversion_traits) can use
-// concrete wrapper type specializations of the static functions in this class
-// to access private fields in the wrapper type (e.g. sycl::half).
+// device. Declared as friend to the sycl::half.
 // TODO add this functionality to sycl type implementation? With C++20,
 // std::bit_cast should be a good replacement.
 class WrapperElementTypeProxy {
diff --git a/sycl/include/sycl/ext/intel/esimd/detail/intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/intrin.hpp
@@ -335,7 +335,27 @@ __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
   }
   return Result;
 }
+#endif // __SYCL_DEVICE_ONLY__
 
+// TODO should be replaced by std::bit_cast once C++20 is supported.
+template <class To, class From,
+          class = std::enable_if_t<sizeof(From) == sizeof(To)>>
+__ESIMD_INTRIN To __esimd_bitcast(From Src)
+#ifdef __SYCL_DEVICE_ONLY__
+    ;
+#else
+{
+  auto *Ptr = reinterpret_cast<To *>(&Src);
+  return *Ptr;
+}
+#endif // __SYCL_DEVICE_ONLY__
+
+#ifdef __SYCL_DEVICE_ONLY__
+// This intrinsic requires one of the types to be _Float16, which is absent on
+// host, so it can't be represented on host. Callers must emulate it.
+template <class To, class From, int N>
+__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
+__esimd_bf_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
 #endif // __SYCL_DEVICE_ONLY__
 
 /// @endcond ESIMD_DETAIL