Skip to content

Commit 9dec1d0

Browse files
vmustyaigcbot
authored andcommitted
Do not bale bfloat fpext into saturate intrinsic in VC
Hardware doesn't allow saturate modifier to be used with bfloat mov instructions. So, VC should emit separate float `mov.sat`.
1 parent 006dbab commit 9dec1d0

File tree

4 files changed

+42
-1
lines changed

4 files changed

+42
-1
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXBaling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ void GenXBaling::processSat(Instruction *Inst) {
921921
setOperandBaled(Inst, OperandNum, &BI);
922922
} else if (ValIntrinID == GenXIntrinsic::not_any_intrinsic) {
923923
if (isa<BinaryOperator>(V) || isa<SelectInst>(V) ||
924-
(isa<CastInst>(V) && !isa<BitCastInst>(V))) {
924+
(isa<CastInst>(V) && !isa<BitCastInst>(V) && !isBFloat16Cast(V))) {
925925
LLVM_DEBUG(llvm::dbgs()
926926
<< __FUNCTION__ << " setting operand #" << OperandNum
927927
<< " to bale in instruction " << *Inst << "\n");

IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,22 @@ bool genx::isNoopCast(const CastInst *CI) {
407407
}
408408
}
409409

410+
bool genx::isBFloat16Cast(const Instruction *I) {
411+
#if LLVM_VERSION_MAJOR >= 11
412+
Type *Ty = nullptr;
413+
if (isa<FPTruncInst>(I))
414+
Ty = I->getType();
415+
else if (isa<FPExtInst>(I))
416+
Ty = I->getOperand(0)->getType();
417+
else
418+
return false;
419+
IGC_ASSERT_EXIT(Ty);
420+
return Ty->getScalarType()->isBFloatTy();
421+
#else // LLVM_VERSION_MAJOR >= 11
422+
return false;
423+
#endif // LLVM_VERSION_MAJOR >= 11
424+
}
425+
410426
/***********************************************************************
411427
* ShuffleVectorAnalyzer::getAsSlice : see if the shufflevector is a slice on
412428
* operand 0, and if so return the start index, or -1 if it is not a slice

IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ bool isFuncPointerVec(Value *V);
252252
// of value (in other words, it can be copy-coalesced).
253253
bool isNoopCast(const CastInst *CI);
254254

255+
// isBFloat16Cast : test if cast operation extends bfloat or truncates to bfloat
256+
bool isBFloat16Cast(const Instruction *I);
257+
255258
// ShuffleVectorAnalyzer : class to analyze a shufflevector
256259
class ShuffleVectorAnalyzer {
257260
ShuffleVectorInst *SI;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm_12_or_greater
10+
; RUN: %opt %use_old_pass_manager% -GenXFuncBaling -print-baling-info -disable-output -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
11+
12+
declare <16 x float> @llvm.genx.sat.v16f32(<16 x float>)
13+
14+
define <16 x float> @test_ext(<16 x bfloat> %src) {
15+
; CHECK: bales in function: test_ext:
16+
; CHECK-NEXT: %sat = call <16 x float> @llvm.genx.sat.v16f32(<16 x float> %ext):
17+
; CHECK-SAME: {{ saturate$}}
18+
; CHECK-NEXT: GenXBaling dump end
19+
%ext = fpext <16 x bfloat> %src to <16 x float>
20+
%sat = call <16 x float> @llvm.genx.sat.v16f32(<16 x float> %ext)
21+
ret <16 x float> %sat
22+
}

0 commit comments

Comments
 (0)