Do not bale bfloat fpext into saturate intrinsic in VC

vmustya · igcbot · commit 9dec1d063dbb · 2023-12-27T23:55:29.000+01:00
Hardware doesn't allow saturate modifier to be used with bfloat mov
instructions. So, VC should emit separate float `mov.sat`.
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXBaling.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXBaling.cpp
@@ -921,7 +921,7 @@ void GenXBaling::processSat(Instruction *Inst) {
       setOperandBaled(Inst, OperandNum, &BI);
     } else if (ValIntrinID == GenXIntrinsic::not_any_intrinsic) {
       if (isa<BinaryOperator>(V) || isa<SelectInst>(V) ||
-          (isa<CastInst>(V) && !isa<BitCastInst>(V))) {
+          (isa<CastInst>(V) && !isa<BitCastInst>(V) && !isBFloat16Cast(V))) {
         LLVM_DEBUG(llvm::dbgs()
                    << __FUNCTION__ << " setting operand #" << OperandNum
                    << " to bale in instruction " << *Inst << "\n");
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp
@@ -407,6 +407,22 @@ bool genx::isNoopCast(const CastInst *CI) {
   }
 }
 
+bool genx::isBFloat16Cast(const Instruction *I) {
+#if LLVM_VERSION_MAJOR >= 11
+  Type *Ty = nullptr;
+  if (isa<FPTruncInst>(I))
+    Ty = I->getType();
+  else if (isa<FPExtInst>(I))
+    Ty = I->getOperand(0)->getType();
+  else
+    return false;
+  IGC_ASSERT_EXIT(Ty);
+  return Ty->getScalarType()->isBFloatTy();
+#else  // LLVM_VERSION_MAJOR >= 11
+  return false;
+#endif // LLVM_VERSION_MAJOR >= 11
+}
+
 /***********************************************************************
  * ShuffleVectorAnalyzer::getAsSlice : see if the shufflevector is a slice on
  *    operand 0, and if so return the start index, or -1 if it is not a slice
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h
@@ -252,6 +252,9 @@ bool isFuncPointerVec(Value *V);
 // of value (in other words, it can be copy-coalesced).
 bool isNoopCast(const CastInst *CI);
 
+// isBFloat16Cast : test if cast operation extends bfloat or truncates to bfloat
+bool isBFloat16Cast(const Instruction *I);
+
 // ShuffleVectorAnalyzer : class to analyze a shufflevector
 class ShuffleVectorAnalyzer {
   ShuffleVectorInst *SI;
diff --git a/IGC/VectorCompiler/test/Baling/cast_sat_bfloat.ll b/IGC/VectorCompiler/test/Baling/cast_sat_bfloat.ll
@@ -0,0 +1,22 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; REQUIRES: llvm_12_or_greater
+; RUN: %opt %use_old_pass_manager% -GenXFuncBaling -print-baling-info -disable-output -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
+
+declare <16 x float> @llvm.genx.sat.v16f32(<16 x float>)
+
+define <16 x float> @test_ext(<16 x bfloat> %src) {
+; CHECK: bales in function: test_ext:
+; CHECK-NEXT:   %sat = call <16 x float> @llvm.genx.sat.v16f32(<16 x float> %ext):
+; CHECK-SAME: {{ saturate$}}
+; CHECK-NEXT: GenXBaling dump end
+  %ext = fpext <16 x bfloat> %src to <16 x float>
+  %sat = call <16 x float> @llvm.genx.sat.v16f32(<16 x float> %ext)
+  ret <16 x float> %sat
+}