@@ -23,6 +23,7 @@ SPDX-License-Identifier: MIT
23
23
#include " llvm/Linker/Linker.h"
24
24
#include " llvm/Support/SourceMgr.h"
25
25
#include " llvm/IRReader/IRReader.h"
26
+ #include " llvm/Transforms/Utils/BasicBlockUtils.h"
26
27
#include " common/LLVMWarningsPop.hpp"
27
28
#include " AdaptorCommon/ImplicitArgs.hpp"
28
29
#include " AdaptorCommon/AddImplicitArgs.hpp"
@@ -596,6 +597,7 @@ inline bool isPrecompiledEmulationFunction(Function* func)
596
597
func->getName ().contains (" precompiled_u32divrem" ) ||
597
598
func->getName ().contains (" precompiled_s32divrem_sp" ) ||
598
599
func->getName ().contains (" precompiled_u32divrem_sp" ) ||
600
+ func->getName ().contains (" precompiled_convert_f64_to_f16" ) ||
599
601
func->getName ().contains (" __igcbuiltin_sp_div" ) ||
600
602
func->getName ().contains (" __igcbuiltin_dp_div_nomadm_ieee" );
601
603
}
@@ -765,44 +767,29 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
765
767
{
766
768
createIntrinsicCall (CI, GenISAIntrinsic::GenISA_fma_rtn);
767
769
}
768
- }
769
- }
770
- }
771
- }
772
- }
773
- }
774
-
775
- // post-process the Int32 precompiled emulation function for div/rem
776
- if (isI32DivRem () || isI32DivRemSP () || isSPDiv ())
777
- {
778
- for (auto FI = M.begin (), FE = M.end (); FI != FE; )
779
- {
780
- llvm::Function* func = &(*FI);
781
- ++FI;
782
- if (isPrecompiledEmulationFunction (func))
783
- {
784
- for (auto BBI = func->begin (), BBE = func->end (); BBI != BBE; )
785
- {
786
- llvm::BasicBlock* BB = &(*BBI);
787
- ++BBI;
788
- for (auto I = BB->begin (), IE = BB->end (); I != IE; I++)
789
- {
790
- if (CallInst * CI = dyn_cast<CallInst>(I))
791
- {
792
- if (Function* calledFunc = CI->getCalledFunction ())
770
+ else if (calledFunc->getName ().startswith (" GenISA_add_rte" ))
771
+ {
772
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rte);
773
+ }
774
+ else if (calledFunc->getName ().startswith (" GenISA_add_rtz" ))
793
775
{
794
- if (calledFunc->getName ().startswith (" GenISA_mul_rtz" ))
795
- {
796
- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_mul_rtz);
797
- }
798
- else if (calledFunc->getName ().startswith (" GenISA_add_rtz" ))
799
- {
800
- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtz);
801
- }
802
- else if (calledFunc->getName ().startswith (" GenISA_uitof_rtz" ))
803
- {
804
- createIntrinsicCall (CI, GenISAIntrinsic::GenISA_uitof_rtz);
805
- }
776
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtz);
777
+ }
778
+ else if (calledFunc->getName ().startswith (" GenISA_add_rtn" ))
779
+ {
780
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtn);
781
+ }
782
+ else if (calledFunc->getName ().startswith (" GenISA_add_rtp" ))
783
+ {
784
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_add_rtp);
785
+ }
786
+ else if (calledFunc->getName ().startswith (" GenISA_mul_rtz" ))
787
+ {
788
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_mul_rtz);
789
+ }
790
+ else if (calledFunc->getName ().startswith (" GenISA_uitof_rtz" ))
791
+ {
792
+ createIntrinsicCall (CI, GenISAIntrinsic::GenISA_uitof_rtz);
806
793
}
807
794
}
808
795
}
@@ -1553,12 +1540,9 @@ void PreCompiledFuncImport::visitFPTruncInst(llvm::FPTruncInst& inst)
1553
1540
m_pModule);
1554
1541
}
1555
1542
1556
- CallInst* funcCall = CallInst::Create (func, inst.getOperand (0 ), inst. getName (), &inst );
1543
+ CallInst* funcCall = CallInst::Create (func, inst.getOperand (0 ));
1557
1544
addCallInst (funcCall);
1558
- funcCall->setDebugLoc (inst.getDebugLoc ());
1559
-
1560
- inst.replaceAllUsesWith (funcCall);
1561
- inst.eraseFromParent ();
1545
+ ReplaceInstWithInst (&inst, funcCall);
1562
1546
1563
1547
m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true ;
1564
1548
m_changed = true ;
@@ -2307,11 +2291,8 @@ As a result, we reduce 2x necessary work
2307
2291
m_CallRemDiv.push_back (&I);
2308
2292
}
2309
2293
2310
- if (!isDPEmu () && !isDPDivSqrtEmu ()) {
2311
- return ;
2312
- }
2313
-
2314
- if (resTy->isDoubleTy () &&
2294
+ if ((isDPEmu () || isDPDivSqrtEmu ()) &&
2295
+ resTy->isDoubleTy () &&
2315
2296
(II && II->getIntrinsicID () == Intrinsic::sqrt))
2316
2297
{
2317
2298
FunctionIDs sqrtType = FUNCTION_DP_SQRT;
@@ -2346,12 +2327,8 @@ As a result, we reduce 2x necessary work
2346
2327
return ;
2347
2328
}
2348
2329
2349
- if (!isDPEmu ()) {
2350
- return ;
2351
- }
2352
-
2353
2330
// llvm.fma.f64
2354
- if (resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fma)
2331
+ if (isDPEmu () && resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fma)
2355
2332
{
2356
2333
Function* newFunc = getOrCreateFunction (FUNCTION_DP_FMA);
2357
2334
Function* CurrFunc = I.getParent ()->getParent ();
@@ -2378,7 +2355,7 @@ As a result, we reduce 2x necessary work
2378
2355
// llvm.fma.rtn.f64
2379
2356
// llvm.fma.rtp.f64
2380
2357
// llvm.fma.rtz.f64
2381
- if (resTy->isDoubleTy () && GII &&
2358
+ if (isDPEmu () && resTy->isDoubleTy () && GII &&
2382
2359
(GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtn ||
2383
2360
GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtp ||
2384
2361
GII->getIntrinsicID () == GenISAIntrinsic::GenISA_fma_rtz))
@@ -2421,8 +2398,52 @@ As a result, we reduce 2x necessary work
2421
2398
return ;
2422
2399
}
2423
2400
2401
+ if ((isFP64toFP16 () || isDPEmu () || isDPConvEmu ()) &&
2402
+ GII &&
2403
+ (GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rte ||
2404
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtz ||
2405
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtn ||
2406
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtp) &&
2407
+ resTy->isHalfTy () && GII->getOperand (0 )->getType ()->isDoubleTy ())
2408
+ {
2409
+ const StringRef funcName =
2410
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rte ?
2411
+ " __precompiled_convert_f64_to_f16_rte" :
2412
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtz ?
2413
+ " __precompiled_convert_f64_to_f16_rtz" :
2414
+ GII->getIntrinsicID () == GenISAIntrinsic::GenISA_ftof_rtn ?
2415
+ " __precompiled_convert_f64_to_f16_rtn" :
2416
+ " __precompiled_convert_f64_to_f16_rtp" ;
2417
+ Function* func = m_pModule->getFunction (funcName);
2418
+
2419
+ // Try to look up the function in the module's symbol
2420
+ // table first, else add it.
2421
+ if (func == NULL )
2422
+ {
2423
+ FunctionType* FuncIntrType = FunctionType::get (
2424
+ resTy,
2425
+ I.getOperand (0 )->getType (),
2426
+ false );
2427
+
2428
+ func = Function::Create (
2429
+ FuncIntrType,
2430
+ GlobalValue::ExternalLinkage,
2431
+ funcName,
2432
+ m_pModule);
2433
+ }
2434
+
2435
+ CallInst* funcCall = CallInst::Create (func, GII->getOperand (0 ));
2436
+ ReplaceInstWithInst (GII, funcCall);
2437
+ addCallInst (funcCall);
2438
+
2439
+ m_libModuleToBeImported[LIBMOD_INT_DIV_REM] = true ;
2440
+ m_changed = true ;
2441
+ m_pCtx->metrics .StatEndEmuFunc (funcCall);
2442
+ return ;
2443
+ }
2444
+
2424
2445
// llvm.fabs.f64
2425
- if (resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fabs)
2446
+ if (isDPEmu () && resTy->isDoubleTy () && II && II->getIntrinsicID () == Intrinsic::fabs)
2426
2447
{
2427
2448
// bit 63 is sign bit, set it to zero. Don't use int64.
2428
2449
VectorType* vec2Ty = IGCLLVM::FixedVectorType::get (intTy, 2 );
0 commit comments