Lower math intrinsics for bfloat in VC

vmustya · igcbot · commit 5e1bd3b7508c · 2023-12-28T00:18:25.000+01:00
.
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXBFloatLowering.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXBFloatLowering.cpp
@@ -72,6 +72,9 @@ class GenXBFloatLowering : public FunctionPass,
   void visitSIToFPInst(SIToFPInst &Inst);
   void visitUIToFPInst(UIToFPInst &Inst);
 
+  // lower intrinsic instructions
+  void visitCallInst(CallInst &Inst);
+
 private:
   void lowerCastToBFloat(CastInst &Inst);
   void lowerCastFromBFloat(CastInst &Inst);
@@ -127,7 +130,11 @@ void GenXBFloatLowering::visitBinaryOperator(BinaryOperator &Inst) {
   Instruction::BinaryOps Opcode = Inst.getOpcode();
   auto *Op0Conv = Builder.CreateFPExt(Src0, FloatTy);
   auto *Op1Conv = Builder.CreateFPExt(Src1, FloatTy);
-  auto *InstUpdate = Builder.CreateBinOp(Opcode, Op0Conv, Op1Conv);
+
+  auto *InstUpdate =
+      cast<Instruction>(Builder.CreateBinOp(Opcode, Op0Conv, Op1Conv));
+  InstUpdate->setFastMathFlags(Inst.getFastMathFlags());
+
   auto *Trunc = Builder.CreateFPTrunc(InstUpdate, Ty);
   Inst.replaceAllUsesWith(Trunc);
   Inst.eraseFromParent();
@@ -195,6 +202,70 @@ void GenXBFloatLowering::visitUIToFPInst(UIToFPInst &Inst) {
   lowerCastToBFloat(Inst);
 }
 
+void GenXBFloatLowering::visitCallInst(CallInst &Inst) {
+  auto IID = vc::getAnyIntrinsicID(&Inst);
+  auto *Ty = Inst.getType();
+  SmallVector<Type *, 2> Types;
+
+  switch (IID) {
+  default:
+    return;
+  case GenXIntrinsic::genx_sat:
+    break;
+  case Intrinsic::cos:
+  case Intrinsic::exp2:
+  case Intrinsic::fabs:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+  case Intrinsic::log2:
+  case Intrinsic::maximum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::minnum:
+  case Intrinsic::pow:
+  case Intrinsic::sin:
+  case Intrinsic::sqrt:
+    break;
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat:
+    Types.push_back(Ty);
+    Ty = Inst.getArgOperand(0)->getType();
+    break;
+  }
+
+  if (!Ty->getScalarType()->isBFloatTy())
+    return;
+
+  LLVM_DEBUG(dbgs() << "GenXBFloatLowering: apply on Intrinsic\n"
+                    << Inst << "\n");
+
+  auto *ExtTy = getFloatTyFromBfloat(Ty);
+  Types.push_back(ExtTy);
+
+  IRBuilder<> Builder(&Inst);
+  if (isa<FPMathOperator>(Inst))
+    Builder.setFastMathFlags(Inst.getFastMathFlags());
+
+  SmallVector<Value *, 4> Args;
+  llvm::transform(Inst.args(), std::back_inserter(Args),
+                  [&Builder, ExtTy](Value *Arg) {
+                    auto *Ty = Arg->getType();
+                    if (!Ty->getScalarType()->isBFloatTy())
+                      return Arg;
+                    return Builder.CreateFPExt(Arg, ExtTy);
+                  });
+
+  auto *Func = vc::getAnyDeclaration(Inst.getModule(), IID, Types);
+  Value *NewInst = Builder.CreateCall(Func, Args);
+
+  if (NewInst->getType()->getScalarType()->isFloatTy())
+    NewInst = Builder.CreateFPTrunc(NewInst, Inst.getType());
+
+  Inst.replaceAllUsesWith(NewInst);
+  Inst.eraseFromParent();
+  Modify = true;
+}
+
 void GenXBFloatLowering::lowerCastToBFloat(CastInst &Inst) {
   auto *ResTy = Inst.getType();
   if (!ResTy->getScalarType()->isBFloatTy())
diff --git a/IGC/VectorCompiler/test/GenXBFloatLowering/intrinsics.ll b/IGC/VectorCompiler/test/GenXBFloatLowering/intrinsics.ll
@@ -0,0 +1,198 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; REQUIRES: llvm_12_or_greater
+; RUN: %opt %use_old_pass_manager% -GenXBFloatLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
+
+declare <8 x bfloat> @llvm.genx.sat.v8bf16(<8 x bfloat>)
+
+declare <8 x bfloat> @llvm.cos.v8bf16(<8 x bfloat>)
+declare <8 x bfloat> @llvm.exp2.v8bf16(<8 x bfloat>)
+declare <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat>)
+declare <8 x bfloat> @llvm.log2.v8bf16(<8 x bfloat>)
+declare <8 x bfloat> @llvm.sin.v8bf16(<8 x bfloat>)
+declare <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat>)
+
+declare <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <8 x bfloat> @llvm.pow.v8bf16(<8 x bfloat>, <8 x bfloat>)
+
+declare <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>)
+declare <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>)
+
+declare <8 x i32> @llvm.fptosi.sat.v8i32.v8bf16(<8 x bfloat>)
+declare <8 x i32> @llvm.fptoui.sat.v8i32.v8bf16(<8 x bfloat>)
+
+; CHECK-LABEL: test_sat
+define <8 x bfloat> @test_sat(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.genx.sat.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.genx.sat.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_cos
+define <8 x bfloat> @test_cos(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call fast <8 x float> @llvm.cos.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call fast <8 x bfloat> @llvm.cos.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_exp2
+define <8 x bfloat> @test_exp2(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.exp2.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.exp2.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_fabs
+define <8 x bfloat> @test_fabs(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_log2
+define <8 x bfloat> @test_log2(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call afn <8 x float> @llvm.log2.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call afn <8 x bfloat> @llvm.log2.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_sin
+define <8 x bfloat> @test_sin(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.sin.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.sin.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_sqrt
+define <8 x bfloat> @test_sqrt(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[EXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %src)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_maximum
+define <8 x bfloat> @test_maximum(<8 x bfloat> %a, <8 x bfloat> %b) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_maxnum
+define <8 x bfloat> @test_maxnum(<8 x bfloat> %a, <8 x bfloat> %b) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_minimum
+define <8 x bfloat> @test_minimum(<8 x bfloat> %a, <8 x bfloat> %b) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_minnum
+define <8 x bfloat> @test_minnum(<8 x bfloat> %a, <8 x bfloat> %b) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_pow
+define <8 x bfloat> @test_pow(<8 x bfloat> %a, <8 x bfloat> %b) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.pow.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.pow.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_fma
+define <8 x bfloat> @test_fma(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[CEXT:%[^ ]+]] = fpext <8 x bfloat> %c to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]], <8 x float> [[CEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_fmuladd
+define <8 x bfloat> @test_fmuladd(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
+  ; CHECK: [[AEXT:%[^ ]+]] = fpext <8 x bfloat> %a to <8 x float>
+  ; CHECK: [[BEXT:%[^ ]+]] = fpext <8 x bfloat> %b to <8 x float>
+  ; CHECK: [[CEXT:%[^ ]+]] = fpext <8 x bfloat> %c to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[AEXT]], <8 x float> [[BEXT]], <8 x float> [[CEXT]])
+  ; CHECK: [[TRUNC:%[^ ]+]] = fptrunc <8 x float> [[RES]] to <8 x bfloat>
+  ; CHECK: ret <8 x bfloat> [[TRUNC]]
+  %res = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
+  ret <8 x bfloat> %res
+}
+
+; CHECK-LABEL: test_fptosi_sat
+define <8 x i32> @test_fptosi_sat(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[EXT]])
+  ; CHECK: ret <8 x i32> [[RES]]
+  %res = call <8 x i32> @llvm.fptosi.sat.v8i32.v8bf16(<8 x bfloat> %src)
+  ret <8 x i32> %res
+}
+
+; CHECK-LABEL: test_fptoui_sat
+define <8 x i32> @test_fptoui_sat(<8 x bfloat> %src) {
+  ; CHECK: [[EXT:%[^ ]+]] = fpext <8 x bfloat> %src to <8 x float>
+  ; CHECK: [[RES:%[^ ]+]] = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> [[EXT]])
+  ; CHECK: ret <8 x i32> [[RES]]
+  %res = call <8 x i32> @llvm.fptoui.sat.v8i32.v8bf16(<8 x bfloat> %src)
+  ret <8 x i32> %res
+}