Skip to content

Commit d9d8a53

Browse files
krystian-andrzejewskiigcbot
authored andcommitted
Support more conversion intrinsics in PreCompiledFuncImport
This change is to allow the compiler to emulate conversions from fp64 to fp 16 using a non-default rounding mode. Note that the emulation requires a full list of addition operations with a non-default rounding mode. Moreover, there are introduced other IGC intrinsics for completeness only.
1 parent 00d5960 commit d9d8a53

File tree

7 files changed

+6621
-6032
lines changed

7 files changed

+6621
-6032
lines changed

IGC/Compiler/Builtins/PreCompiledFuncLibrary.hpp

Lines changed: 6309 additions & 5930 deletions
Large diffs are not rendered by default.

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 23 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -9147,17 +9147,20 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
91479147
true);
91489148
m_encoder->Push();
91499149
break;
9150-
}
9150+
}
91519151
case GenISAIntrinsic::GenISA_mul_rtz:
9152-
case GenISAIntrinsic::GenISA_fma_rtz:
9152+
case GenISAIntrinsic::GenISA_mul_rte:
9153+
case GenISAIntrinsic::GenISA_mul_rtp:
9154+
case GenISAIntrinsic::GenISA_mul_rtn:
91539155
case GenISAIntrinsic::GenISA_add_rtz:
9154-
emitFPOrtz(inst);
9155-
break;
9156+
case GenISAIntrinsic::GenISA_add_rte:
9157+
case GenISAIntrinsic::GenISA_add_rtp:
9158+
case GenISAIntrinsic::GenISA_add_rtn:
9159+
case GenISAIntrinsic::GenISA_fma_rtz:
9160+
case GenISAIntrinsic::GenISA_fma_rte:
91569161
case GenISAIntrinsic::GenISA_fma_rtp:
9157-
emitFMArtp(inst);
9158-
break;
91599162
case GenISAIntrinsic::GenISA_fma_rtn:
9160-
emitFMArtn(inst);
9163+
emitFPOWithNonDefaultRoundingMode(inst);
91619164
break;
91629165
case GenISAIntrinsic::GenISA_CatchAllDebugLine:
91639166
emitDebugPlaceholder(inst);
@@ -16942,8 +16945,8 @@ void EmitPass::emitfitof(llvm::GenIntrinsicInst* inst)
1694216945
ResetRoundingMode(inst);
1694316946
}
1694416947

16945-
// Emit FP Operations (FPO) using round-to-zero (rtz)
16946-
void EmitPass::emitFPOrtz(llvm::GenIntrinsicInst* inst)
16948+
// Emit FP Operations (FPO) using a non-default rounding mode
16949+
void EmitPass::emitFPOWithNonDefaultRoundingMode(llvm::GenIntrinsicInst* inst)
1694716950
{
1694816951
IGC_ASSERT_MESSAGE(IGCLLVM::getNumArgOperands(inst) >= 2, "ICE: incorrect gen intrinsic");
1694916952

@@ -16952,22 +16955,31 @@ void EmitPass::emitFPOrtz(llvm::GenIntrinsicInst* inst)
1695216955
CVariable* src1 = GetSymbol(inst->getOperand(1));
1695316956
CVariable* dst = m_destination;
1695416957

16955-
SetRoundingMode_FP(ERoundingMode::ROUND_TO_ZERO);
16958+
SetRoundingMode_FP(GetRoundingMode_FP(m_pCtx->getModuleMetaData(), inst));
1695616959

1695716960
switch (GID)
1695816961
{
1695916962
default:
1696016963
IGC_ASSERT_MESSAGE(0, "ICE: unexpected Gen Intrinsic");
1696116964
break;
1696216965
case GenISAIntrinsic::GenISA_mul_rtz:
16966+
case GenISAIntrinsic::GenISA_mul_rte:
16967+
case GenISAIntrinsic::GenISA_mul_rtp:
16968+
case GenISAIntrinsic::GenISA_mul_rtn:
1696316969
m_encoder->Mul(dst, src0, src1);
1696416970
m_encoder->Push();
1696516971
break;
16966-
case GenISAIntrinsic::GenISA_add_rtz:
16972+
case GenISAIntrinsic::GenISA_add_rtz:
16973+
case GenISAIntrinsic::GenISA_add_rte:
16974+
case GenISAIntrinsic::GenISA_add_rtp:
16975+
case GenISAIntrinsic::GenISA_add_rtn:
1696716976
m_encoder->Add(dst, src0, src1);
1696816977
m_encoder->Push();
1696916978
break;
1697016979
case GenISAIntrinsic::GenISA_fma_rtz:
16980+
case GenISAIntrinsic::GenISA_fma_rte:
16981+
case GenISAIntrinsic::GenISA_fma_rtp:
16982+
case GenISAIntrinsic::GenISA_fma_rtn:
1697116983
{
1697216984
CVariable* src2 = GetSymbol(inst->getOperand(2));
1697316985
m_encoder->Mad(dst, src0, src1, src2);
@@ -16979,40 +16991,6 @@ void EmitPass::emitFPOrtz(llvm::GenIntrinsicInst* inst)
1697916991
ResetRoundingMode(inst);
1698016992
}
1698116993

16982-
// Emit FP mad (FMA) using round-to-positive-infinity (rtp)
16983-
void EmitPass::emitFMArtp(llvm::GenIntrinsicInst *inst) {
16984-
IGC_ASSERT_MESSAGE(IGCLLVM::getNumArgOperands(inst) == 3, "ICE: incorrect gen intrinsic");
16985-
16986-
CVariable *src0 = GetSymbol(inst->getOperand(0));
16987-
CVariable *src1 = GetSymbol(inst->getOperand(1));
16988-
CVariable *src2 = GetSymbol(inst->getOperand(2));
16989-
CVariable *dst = m_destination;
16990-
16991-
SetRoundingMode_FP(ERoundingMode::ROUND_TO_POSITIVE);
16992-
16993-
m_encoder->Mad(dst, src0, src1, src2);
16994-
m_encoder->Push();
16995-
16996-
ResetRoundingMode(inst);
16997-
}
16998-
16999-
// Emit FP mad (FMA) using round-to-negative-infinity (rtn)
17000-
void EmitPass::emitFMArtn(llvm::GenIntrinsicInst *inst) {
17001-
IGC_ASSERT_MESSAGE(IGCLLVM::getNumArgOperands(inst) == 3, "ICE: incorrect gen intrinsic");
17002-
17003-
CVariable *src0 = GetSymbol(inst->getOperand(0));
17004-
CVariable *src1 = GetSymbol(inst->getOperand(1));
17005-
CVariable *src2 = GetSymbol(inst->getOperand(2));
17006-
CVariable *dst = m_destination;
17007-
17008-
SetRoundingMode_FP(ERoundingMode::ROUND_TO_NEGATIVE);
17009-
17010-
m_encoder->Mad(dst, src0, src1, src2);
17011-
m_encoder->Push();
17012-
17013-
ResetRoundingMode(inst);
17014-
}
17015-
1701616994
void EmitPass::emitftoi(llvm::GenIntrinsicInst* inst)
1701716995
{
1701816996
IGC_ASSERT_MESSAGE(inst->getOperand(0)->getType()->isFloatingPointTy(), "Unsupported type");

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,9 +419,7 @@ class EmitPass : public llvm::FunctionPass
419419

420420
void emitf32tof16_rtz(llvm::GenIntrinsicInst* inst);
421421
void emitfitof(llvm::GenIntrinsicInst* inst);
422-
void emitFPOrtz(llvm::GenIntrinsicInst* inst);
423-
void emitFMArtp(llvm::GenIntrinsicInst* inst);
424-
void emitFMArtn(llvm::GenIntrinsicInst* inst);
422+
void emitFPOWithNonDefaultRoundingMode(llvm::GenIntrinsicInst* inst);
425423
void emitftoi(llvm::GenIntrinsicInst* inst);
426424
void emitCtlz(const SSource& source);
427425

IGC/Compiler/CISACodeGen/helper.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,16 +2300,23 @@ namespace IGC
23002300
case GenISAIntrinsic::GenISA_ftof_rtn:
23012301
case GenISAIntrinsic::GenISA_itof_rtn:
23022302
case GenISAIntrinsic::GenISA_uitof_rtn:
2303+
case GenISAIntrinsic::GenISA_add_rtn:
2304+
case GenISAIntrinsic::GenISA_mul_rtn:
23032305
case GenISAIntrinsic::GenISA_fma_rtn:
23042306
RM = ERoundingMode::ROUND_TO_NEGATIVE;
23052307
break;
23062308
case GenISAIntrinsic::GenISA_ftof_rtp:
23072309
case GenISAIntrinsic::GenISA_itof_rtp:
23082310
case GenISAIntrinsic::GenISA_uitof_rtp:
2311+
case GenISAIntrinsic::GenISA_add_rtp:
2312+
case GenISAIntrinsic::GenISA_mul_rtp:
23092313
case GenISAIntrinsic::GenISA_fma_rtp:
23102314
RM = ERoundingMode::ROUND_TO_POSITIVE;
23112315
break;
23122316
case GenISAIntrinsic::GenISA_ftof_rte:
2317+
case GenISAIntrinsic::GenISA_add_rte:
2318+
case GenISAIntrinsic::GenISA_mul_rte:
2319+
case GenISAIntrinsic::GenISA_fma_rte:
23132320
RM = ERoundingMode::ROUND_TO_NEAREST_EVEN;
23142321
break;
23152322
case GenISAIntrinsic::GenISA_ftobf:

IGC/Compiler/Optimizer/PreCompiledFuncImport.cpp

Lines changed: 75 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ SPDX-License-Identifier: MIT
2323
#include "llvm/Linker/Linker.h"
2424
#include "llvm/Support/SourceMgr.h"
2525
#include "llvm/IRReader/IRReader.h"
26+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
2627
#include "common/LLVMWarningsPop.hpp"
2728
#include "AdaptorCommon/ImplicitArgs.hpp"
2829
#include "AdaptorCommon/AddImplicitArgs.hpp"
@@ -596,6 +597,7 @@ inline bool isPrecompiledEmulationFunction(Function* func)
596597
func->getName().contains("precompiled_u32divrem") ||
597598
func->getName().contains("precompiled_s32divrem_sp") ||
598599
func->getName().contains("precompiled_u32divrem_sp") ||
600+
func->getName().contains("precompiled_convert_f64_to_f16") ||
599601
func->getName().contains("__igcbuiltin_sp_div") ||
600602
func->getName().contains("__igcbuiltin_dp_div_nomadm_ieee");
601603
}
@@ -765,44 +767,29 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
765767
{
766768
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_fma_rtn);
767769
}
768-
}
769-
}
770-
}
771-
}
772-
}
773-
}
774-
775-
//post-process the Int32 precompiled emulation function for div/rem
776-
if (isI32DivRem() || isI32DivRemSP() || isSPDiv())
777-
{
778-
for (auto FI = M.begin(), FE = M.end(); FI != FE; )
779-
{
780-
llvm::Function* func = &(*FI);
781-
++FI;
782-
if (isPrecompiledEmulationFunction(func))
783-
{
784-
for (auto BBI = func->begin(), BBE = func->end(); BBI != BBE; )
785-
{
786-
llvm::BasicBlock* BB = &(*BBI);
787-
++BBI;
788-
for (auto I = BB->begin(), IE = BB->end(); I != IE; I++)
789-
{
790-
if (CallInst * CI = dyn_cast<CallInst>(I))
791-
{
792-
if (Function* calledFunc = CI->getCalledFunction())
770+
else if (calledFunc->getName().startswith("GenISA_add_rte"))
771+
{
772+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rte);
773+
}
774+
else if (calledFunc->getName().startswith("GenISA_add_rtz"))
793775
{
794-
if (calledFunc->getName().startswith("GenISA_mul_rtz"))
795-
{
796-
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_mul_rtz);
797-
}
798-
else if (calledFunc->getName().startswith("GenISA_add_rtz"))
799-
{
800-
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rtz);
801-
}
802-
else if (calledFunc->getName().startswith("GenISA_uitof_rtz"))
803-
{
804-
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_uitof_rtz);
805-
}
776+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rtz);
777+
}
778+
else if (calledFunc->getName().startswith("GenISA_add_rtn"))
779+
{
780+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rtn);
781+
}
782+
else if (calledFunc->getName().startswith("GenISA_add_rtp"))
783+
{
784+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rtp);
785+
}
786+
else if (calledFunc->getName().startswith("GenISA_mul_rtz"))
787+
{
788+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_mul_rtz);
789+
}
790+
else if (calledFunc->getName().startswith("GenISA_uitof_rtz"))
791+
{
792+
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_uitof_rtz);
806793
}
807794
}
808795
}
@@ -1553,12 +1540,9 @@ void PreCompiledFuncImport::visitFPTruncInst(llvm::FPTruncInst& inst)
15531540
m_pModule);
15541541
}
15551542

1556-
CallInst* funcCall = CallInst::Create(func, inst.getOperand(0), inst.getName(), &inst);
1543+
CallInst* funcCall = CallInst::Create(func, inst.getOperand(0));
15571544
addCallInst(funcCall);
1558-
funcCall->setDebugLoc(inst.getDebugLoc());
1559-
1560-
inst.replaceAllUsesWith(funcCall);
1561-
inst.eraseFromParent();
1545+
ReplaceInstWithInst(&inst, funcCall);
15621546

15631547
m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true;
15641548
m_changed = true;
@@ -2307,11 +2291,8 @@ As a result, we reduce 2x necessary work
23072291
m_CallRemDiv.push_back(&I);
23082292
}
23092293

2310-
if (!isDPEmu() && !isDPDivSqrtEmu()) {
2311-
return;
2312-
}
2313-
2314-
if (resTy->isDoubleTy() &&
2294+
if ((isDPEmu() || isDPDivSqrtEmu()) &&
2295+
resTy->isDoubleTy() &&
23152296
(II && II->getIntrinsicID() == Intrinsic::sqrt))
23162297
{
23172298
FunctionIDs sqrtType = FUNCTION_DP_SQRT;
@@ -2346,12 +2327,8 @@ As a result, we reduce 2x necessary work
23462327
return;
23472328
}
23482329

2349-
if (!isDPEmu()) {
2350-
return;
2351-
}
2352-
23532330
// llvm.fma.f64
2354-
if (resTy->isDoubleTy() && II && II->getIntrinsicID() == Intrinsic::fma)
2331+
if (isDPEmu() && resTy->isDoubleTy() && II && II->getIntrinsicID() == Intrinsic::fma)
23552332
{
23562333
Function* newFunc = getOrCreateFunction(FUNCTION_DP_FMA);
23572334
Function* CurrFunc = I.getParent()->getParent();
@@ -2378,7 +2355,7 @@ As a result, we reduce 2x necessary work
23782355
// llvm.fma.rtn.f64
23792356
// llvm.fma.rtp.f64
23802357
// llvm.fma.rtz.f64
2381-
if (resTy->isDoubleTy() && GII &&
2358+
if (isDPEmu() && resTy->isDoubleTy() && GII &&
23822359
(GII->getIntrinsicID() == GenISAIntrinsic::GenISA_fma_rtn ||
23832360
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_fma_rtp ||
23842361
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_fma_rtz))
@@ -2421,8 +2398,52 @@ As a result, we reduce 2x necessary work
24212398
return;
24222399
}
24232400

2401+
if ((isFP64toFP16() || isDPEmu() || isDPConvEmu()) &&
2402+
GII &&
2403+
(GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rte ||
2404+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rtz ||
2405+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rtn ||
2406+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rtp) &&
2407+
resTy->isHalfTy() && GII->getOperand(0)->getType()->isDoubleTy())
2408+
{
2409+
const StringRef funcName =
2410+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rte ?
2411+
"__precompiled_convert_f64_to_f16_rte" :
2412+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rtz ?
2413+
"__precompiled_convert_f64_to_f16_rtz" :
2414+
GII->getIntrinsicID() == GenISAIntrinsic::GenISA_ftof_rtn ?
2415+
"__precompiled_convert_f64_to_f16_rtn" :
2416+
"__precompiled_convert_f64_to_f16_rtp";
2417+
Function* func = m_pModule->getFunction(funcName);
2418+
2419+
// Try to look up the function in the module's symbol
2420+
// table first, else add it.
2421+
if (func == NULL)
2422+
{
2423+
FunctionType* FuncIntrType = FunctionType::get(
2424+
resTy,
2425+
I.getOperand(0)->getType(),
2426+
false);
2427+
2428+
func = Function::Create(
2429+
FuncIntrType,
2430+
GlobalValue::ExternalLinkage,
2431+
funcName,
2432+
m_pModule);
2433+
}
2434+
2435+
CallInst* funcCall = CallInst::Create(func, GII->getOperand(0));
2436+
ReplaceInstWithInst(GII, funcCall);
2437+
addCallInst(funcCall);
2438+
2439+
m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true;
2440+
m_changed = true;
2441+
m_pCtx->metrics.StatEndEmuFunc(funcCall);
2442+
return;
2443+
}
2444+
24242445
// llvm.fabs.f64
2425-
if (resTy->isDoubleTy() && II && II->getIntrinsicID() == Intrinsic::fabs)
2446+
if (isDPEmu() && resTy->isDoubleTy() && II && II->getIntrinsicID() == Intrinsic::fabs)
24262447
{
24272448
// bit 63 is sign bit, set it to zero. Don't use int64.
24282449
VectorType* vec2Ty = IGCLLVM::FixedVectorType::get(intTy, 2);

0 commit comments

Comments
 (0)