intel · v-klochkov · Apr 4, 2023 · Feb 28, 2023 · Feb 28, 2023 · Mar 23, 2023
@@ -672,6 +672,10 @@ class ESIMDIntrinDescTable {
         {"slm_init", {"slm.init", {a(0)}}},
         {"bf_cvt", {"bf.cvt", {a(0)}}},
         {"tf32_cvt", {"tf32.cvt", {a(0)}}},
+        {"__devicelib_ConvertFToBF16INTEL",
+         {"__spirv_ConvertFToBF16INTEL", {a(0)}}},
+        {"__devicelib_ConvertBF16ToFINTEL",
+         {"__spirv_ConvertBF16ToFINTEL", {a(0)}}},
         {"addc", {"addc", {l(0)}}},
         {"subb", {"subb", {l(0)}}},
         {"bfn", {"bfn", {a(0), a(1), a(2), t(0)}}}};
@@ -703,6 +707,28 @@ static const ESIMDIntrinDesc &getIntrinDesc(StringRef SrcSpelling) {
   return It->second;
 }
 
+static bool isDevicelibFunction(StringRef FunctionName) {
+  return llvm::StringSwitch<bool>(FunctionName)
+      .Case("__devicelib_ConvertFToBF16INTEL", true)
+      .Case("__devicelib_ConvertBF16ToFINTEL", true)
+      .Default(false);
+}
+
+// Mangle deviceLib function to make it pass through the regular workflow
+// These functions are defined as extern "C" which Demangler that is used
+// fails to handle properly.
+static std::string mangleDevicelibFunction(StringRef FunctionName) {
+  if (isDevicelibFunction(FunctionName)) {
+    if (FunctionName.startswith("__devicelib_ConvertFToBF16INTEL")) {
+      return (Twine("_Z31") + FunctionName + "RKf").str();
+    }
+    if (FunctionName.startswith("__devicelib_ConvertBF16ToFINTEL")) {
+      return (Twine("_Z31") + FunctionName + "RKt").str();
+    }
+  }
+  return FunctionName.str();
+}
+
 Type *parsePrimitiveTypeString(StringRef TyStr, LLVMContext &Ctx) {
   return llvm::StringSwitch<Type *>(TyStr)
       .Case("bool", IntegerType::getInt1Ty(Ctx))
@@ -1326,6 +1352,46 @@ static void createESIMDIntrinsicArgs(const ESIMDIntrinDesc &Desc,
   }
 }
 
+// Create a spirv function declaration
+// This is used for lowering devicelib functions.
+// The function
+// 1. Generates spirv function definition
+// 2. Converts passed by reference argument of devicelib function into passed by
+// value argument of spirv functions
+// 3. Assigns proper attributes to generated function
+static Function *
+createDeviceLibESIMDDeclaration(const ESIMDIntrinDesc &Desc,
+                                SmallVector<Value *, 16> &GenXArgs,
+                                CallInst &CI) {
+  SmallVector<Type *, 16> ArgTypes;
+  IRBuilder<> Bld(&CI);
+  for (unsigned i = 0; i < GenXArgs.size(); ++i) {
+    Type *NTy = llvm::StringSwitch<Type *>(Desc.GenXSpelling)
+                    .Case("__spirv_ConvertFToBF16INTEL",
+                          Type::getFloatTy(CI.getContext()))
+                    .Case("__spirv_ConvertBF16ToFINTEL",
+                          Type::getInt16Ty(CI.getContext()))
+                    .Default(nullptr);
+
+    auto LI = Bld.CreateLoad(NTy, GenXArgs[i]);
+    GenXArgs[i] = LI;
+    ArgTypes.push_back(NTy);
+  }
+  auto *FType = FunctionType::get(CI.getType(), ArgTypes, false);
+  Function *F = CI.getModule()->getFunction(Desc.GenXSpelling);
+  if (!F) {
+    F = Function::Create(FType, GlobalVariable::ExternalLinkage,
+                         Desc.GenXSpelling, CI.getModule());
+    F->addFnAttr(Attribute::NoUnwind);
+    F->addFnAttr(Attribute::Convergent);
+    F->setDSOLocal(true);
+
+    F->setCallingConv(CallingConv::SPIR_FUNC);
+  }
+
+  return F;
+}
+
 // Create a simple function declaration
 // This is used for testing purposes, when it is impossible to query
 // vc-intrinsics
@@ -1403,7 +1469,9 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
   using Demangler = id::ManglingParser<SimpleAllocator>;
   Function *F = CI.getCalledFunction();
   llvm::esimd::assert_and_diag(F, "function to translate is invalid");
-  StringRef MnglName = F->getName();
+  std::string MnglNameStr = mangleDevicelibFunction(F->getName());
+  StringRef MnglName = MnglNameStr;
+
   Demangler Parser(MnglName.begin(), MnglName.end());
   id::Node *AST = Parser.parse();
 
@@ -1416,7 +1484,9 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
   auto *FE = static_cast<id::FunctionEncoding *>(AST);
   id::StringView BaseNameV = FE->getName()->getBaseName();
 
-  auto PrefLen = StringRef(ESIMD_INTRIN_PREF1).size();
+  auto PrefLen = isDevicelibFunction(F->getName())
+                     ? 0
+                     : StringRef(ESIMD_INTRIN_PREF1).size();
   StringRef BaseName(BaseNameV.begin() + PrefLen, BaseNameV.size() - PrefLen);
   const auto &Desc = getIntrinDesc(BaseName);
   if (!Desc.isValid()) // TODO remove this once all intrinsics are supported
@@ -1429,7 +1499,9 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
   Function *NewFDecl = nullptr;
   bool DoesFunctionReturnStructure =
       isStructureReturningFunction(Desc.GenXSpelling);
-  if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
+  if (isDevicelibFunction(F->getName())) {
+    NewFDecl = createDeviceLibESIMDDeclaration(Desc, GenXArgs, CI);
+  } else if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
     // Special case for testing purposes
     NewFDecl = createTestESIMDDeclaration(Desc, GenXArgs, CI);
   } else {
@@ -1724,7 +1796,7 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
 
       // See if the Name represents an ESIMD intrinsic and demangle only if it
       // does.
-      if (!Name.consume_front(ESIMD_INTRIN_PREF0))
+      if (!Name.consume_front(ESIMD_INTRIN_PREF0) && !isDevicelibFunction(Name))
         continue;
       // now skip the digits
       Name = Name.drop_while([](char C) { return std::isdigit(C); });
@@ -1771,7 +1843,8 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
       assert(!Name.startswith("__sycl_set_kernel_properties") &&
              "__sycl_set_kernel_properties must have been lowered");
 
-      if (Name.empty() || !Name.startswith(ESIMD_INTRIN_PREF1))
+      if (Name.empty() ||
+          (!Name.startswith(ESIMD_INTRIN_PREF1) && !isDevicelibFunction(Name)))
         continue;
       // this is ESIMD intrinsic - record for later translation
       ESIMDIntrCalls.push_back(CI);

@@ -0,0 +1,61 @@
+// REQUIRES: gpu
+// UNSUPPORTED: gpu-intel-gen9 || cuda || hip
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// XFAIL: gpu && !esimd_emulator
+//==- bfloat16Constructor.cpp - Test to verify use of bfloat16 constructor -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This is basic test to verify use of bfloat16 constructor in kernel.
+// TODO: Enable the test once the GPU RT supporting the functionality reaches
+// the CI
+
+#include <CL/sycl.hpp>
+#include <ext/intel/esimd.hpp>
+#include <iostream>
+
+using namespace sycl;
+
+int main() {
+  constexpr unsigned Size = 32;
+  constexpr unsigned VL = 32;
+  constexpr unsigned GroupSize = 1;
+
+  queue q;
+  auto dev = q.get_device();
+  std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
+  auto *C = malloc_shared<float>(Size * sizeof(float), dev, q.get_context());
+
+  for (auto i = 0; i != Size; i++) {
+    C[i] = 7;
+  }
+
+  nd_range<1> Range(range<1>(Size / VL), range<1>(GroupSize));
+
+  auto e = q.submit([&](handler &cgh) {
+    cgh.parallel_for<class Test>(Range, [=](nd_item<1> i) SYCL_ESIMD_KERNEL {
+      using bf16 = sycl::ext::oneapi::bfloat16;
+      using namespace __ESIMD_NS;
+      using namespace __ESIMD_ENS;
+      simd<bf16, 32> data_bf16 = bf16(0);
+      simd<float, 32> data = data_bf16;
+      lsc_block_store<float, 32>(C, data);
+    });
+  });
+  e.wait();
+  bool Pass = true;
+  for (auto i = 0; i != Size; i++) {
+    if (C[i] != 0) {
+      Pass = false;
+    }
+  }
+
+  free(C, q);
+  std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
+  return 0;
+}
@@ -44,7 +44,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void bf16_scalar() {
   // The actual support in GPU RT is on the way though.
   float F32_scalar = 1;
   bfloat16 BF16_scalar = F32_scalar;
-  // CHECK: call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{[^)]+}})
+  // CHECK: call i16 @__spirv_ConvertFToBF16INTEL(float {{[^)]+}})
   float F32_scalar_conv = BF16_scalar;
-  // CHECK: call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{[^)]+}})
+  // CHECK: call float @__spirv_ConvertBF16ToFINTEL(i16 {{[^)]+}})
 }