@@ -17,7 +17,6 @@ SPDX-License-Identifier: MIT
17
17
#include " llvm/IR/InstIterator.h"
18
18
#include " llvm/Support/MemoryBuffer.h"
19
19
#include " llvm/Support/GenericDomTree.h"
20
- #include " llvm/Transforms/Utils/Cloning.h"
21
20
#include " llvm/Bitcode/BitcodeReader.h"
22
21
#include " llvm/Bitcode/BitcodeWriter.h"
23
22
#include " llvm/Linker/Linker.h"
@@ -633,7 +632,7 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
633
632
m_changed = false ;
634
633
635
634
// When we test it, we need to set emuKind
636
- if (IGC_GET_FLAG_VALUE (TestIGCPreCompiledFunctions) == 1 )
635
+ if (IGC_IS_FLAG_ENABLED (TestIGCPreCompiledFunctions))
637
636
{
638
637
m_emuKind = IGC_GET_FLAG_VALUE (ForceEmuKind) ? IGC_GET_FLAG_VALUE (ForceEmuKind) : EmuKind::EMU_DP;
639
638
checkAndSetEnableSubroutine ();
@@ -828,11 +827,12 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
828
827
}
829
828
}
830
829
831
- llvm::SmallVector<ImportedFunction, 32 > importedFunctions;
832
- unsigned totalNumberOfInlinedInst = 0 , totalNumberOfPotentiallyInlinedInst = 0 ;
830
+ unsigned totalNumberOfInlinedInst = 0 ;
833
831
int emuFC = (int )IGC_GET_FLAG_VALUE (EmulationFunctionControl);
834
832
835
- // Post processing, set those imported functions as internal linkage.
833
+ // Post processing, set those imported functions as internal linkage
834
+ // and alwaysinline. Also count how many instructions would be added
835
+ // to the shader if inlining occurred.
836
836
for (auto II = M.begin (), IE = M.end (); II != IE; )
837
837
{
838
838
Function* Func = &(*II);
@@ -854,101 +854,92 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
854
854
continue ;
855
855
}
856
856
857
- if (std::find (importedFunctions.begin (), importedFunctions.end (), Func) == importedFunctions.end ())
858
- importedFunctions.push_back (Func);
859
- }
860
- else
861
- {
862
- // Make sure original func isn't inlined accidentally.
857
+ // Remove noinline/AlwaysInline attr if present.
858
+ Func->removeFnAttr (llvm::Attribute::NoInline);
863
859
Func->removeFnAttr (llvm::Attribute::AlwaysInline);
864
- }
865
- }
866
-
867
- // Sort imported instructions in preferred inlining order.
868
- std::sort (importedFunctions.begin (), importedFunctions.end (), ImportedFunction::compare);
869
860
870
- // Post processing, set those imported functions as alwaysinline.
871
- // Also count how many instructions would be added to the shader
872
- // if inlining occurred.
873
- for (auto II = importedFunctions.begin (), IE = importedFunctions.end (); II != IE; ++II)
874
- {
875
- Function* Func = II->F ;
861
+ if (m_enableCallForEmulation &&
862
+ emuFC != FLAG_FCALL_DEFAULT &&
863
+ emuFC != FLAG_FCALL_FORCE_INLINE)
864
+ {
865
+ // Disable inlining completely.
866
+ continue ;
867
+ }
876
868
877
- // Remove noinline/AlwaysInline attr if present.
878
- Func->removeFnAttr (llvm::Attribute::NoInline);
879
- Func->removeFnAttr (llvm::Attribute::AlwaysInline);
869
+ if (Func->hasOneUse () || emuFC == FLAG_FCALL_FORCE_INLINE)
870
+ {
871
+ Func->addFnAttr (llvm::Attribute::AlwaysInline);
872
+ continue ;
873
+ }
880
874
881
- if (m_enableCallForEmulation &&
882
- emuFC != FLAG_FCALL_DEFAULT &&
883
- emuFC != FLAG_FCALL_FORCE_INLINE)
884
- {
885
- // Disable inlining completely.
886
- continue ;
887
- }
875
+ // Count number of instructions in the function
876
+ unsigned NumInst = 0 ;
877
+ for (BasicBlock& BB : Func->getBasicBlockList ()) {
878
+ NumInst += BB.getInstList ().size ();
879
+ }
888
880
889
- if (Func-> hasOneUse () || emuFC == FLAG_FCALL_FORCE_INLINE)
890
- {
891
- Func-> addFnAttr (llvm::Attribute::AlwaysInline);
892
- continue ;
893
- }
881
+ // Don't want to subroutine small functions
882
+ if (NumInst <= 5 )
883
+ {
884
+ // Add AlwaysInline attribute to force inlining all calls.
885
+ Func-> addFnAttr (llvm::Attribute::AlwaysInline);
894
886
895
- // Don't want to subroutine small functions
896
- if (II->funcInstructions <= 5 )
897
- {
898
- // Add AlwaysInline attribute to force inlining all calls.
899
- Func->addFnAttr (llvm::Attribute::AlwaysInline);
887
+ continue ;
888
+ }
900
889
901
- continue ;
890
+ totalNumberOfInlinedInst += NumInst * Func-> getNumUses () ;
902
891
}
903
-
904
- totalNumberOfPotentiallyInlinedInst += II->totalInstructions ;
905
-
906
- // If function fits in threshold, always inline.
907
- if (totalNumberOfInlinedInst + II->totalInstructions <= (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold))
892
+ else
908
893
{
909
- totalNumberOfInlinedInst += II-> totalInstructions ;
910
- Func->addFnAttr (llvm::Attribute::AlwaysInline);
894
+ // Make sure original func isn't inlined accidentally.
895
+ Func->removeFnAttr (llvm::Attribute::AlwaysInline);
911
896
}
912
897
}
913
898
914
- // Check if more functions can fit in threshold if they would be split into inline/noinline copies.
915
- if (m_enableCallForEmulation && emuFC == FLAG_FCALL_DEFAULT && totalNumberOfInlinedInst < ( unsigned ) IGC_GET_FLAG_VALUE (InlinedEmulationThreshold))
916
- {
917
- for ( auto II = importedFunctions. begin (); II != importedFunctions. end (); ++II)
918
- {
919
- Function* Func = II-> F ;
920
-
921
- if (Func-> hasFnAttribute (llvm::Attribute::AlwaysInline))
922
- continue ;
923
-
924
- unsigned calls = (( unsigned ) IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) - totalNumberOfInlinedInst) / II-> funcInstructions ;
925
- if (calls > 0 )
926
- {
927
- // Split function into inline/no-inline copies.
928
- ImportedFunction copy = createInlinedCopy (*II, calls);
929
- importedFunctions. push_back (copy);
930
- totalNumberOfInlinedInst += copy. totalInstructions ;
931
- }
899
+ // If true, it is a slow version of DP emu functions. Those functions
900
+ // are the original ones for just passing conformance, not for perf.
901
+ auto isSlowDPEmuFunc = [](Function* F) {
902
+ StringRef FN = F-> getName ();
903
+ if (FN. equals ( " __igcbuiltin_dp_add " ) ||
904
+ FN. equals ( " __igcbuiltin_dp_sub " ) ||
905
+ FN. equals ( " __igcbuiltin_dp_fma " ) ||
906
+ FN. equals ( " __igcbuiltin_dp_mul " ) ||
907
+ FN. equals ( " __igcbuiltin_dp_div " ) ||
908
+ FN. equals ( " __igcbuiltin_dp_cmp " ) ||
909
+ FN. equals ( " __igcbuiltin_dp_to_int32 " ) ||
910
+ FN. equals ( " __igcbuiltin_dp_to_uint32 " ) ||
911
+ FN. equals ( " __igcbuiltin_int32_to_dp " ) ||
912
+ FN. equals ( " __igcbuiltin_uint32_to_dp " ) ||
913
+ FN. equals ( " __igcbuiltin_dp_to_sp " ) ||
914
+ FN. equals ( " __igcbuiltin_sp_to_dp " ) ||
915
+ FN. equals ( " __igcbuiltin_dp_sqrt " )) {
916
+ return true ;
932
917
}
933
- }
918
+ return false ;
919
+ };
934
920
935
- for (auto II = importedFunctions .begin (), IE = importedFunctions .end (); II != IE; ++II )
921
+ for (auto II = M .begin (), IE = M .end (); II != IE; )
936
922
{
937
- Function* Func = II->F ;
923
+ Function* Func = &(*II);
924
+ ++II;
925
+ if (!Func || Func->isDeclaration ())
926
+ {
927
+ continue ;
928
+ }
938
929
939
- if (!Func->hasFnAttribute (llvm::Attribute::AlwaysInline))
930
+ if (!origFunctions. count (Func) && ! Func->hasFnAttribute (llvm::Attribute::AlwaysInline))
940
931
{
941
932
// Special handling of DP functions: any one that has not been marked as inline
942
933
// at this point, it will be either subroutine or stackcall.
943
- const bool isDPCallFunc = (isDPEmu () && II-> isSlowDPEmuFunc ());
934
+ const bool isDPCallFunc = (isDPEmu () && isSlowDPEmuFunc (Func ));
944
935
945
936
// Use subroutine/stackcall for some DP emulation functions if
946
937
// EmulationFunctionControl is set so, or
947
938
// use subroutines if total number of instructions added when
948
939
// all emulated functions are inlined exceed InlinedEmulationThreshold.
949
940
// If Func is a slow version of DP emu func, perf isn't important.
950
941
if (m_enableCallForEmulation &&
951
- (totalNumberOfPotentiallyInlinedInst > (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) ||
942
+ (totalNumberOfInlinedInst > (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) ||
952
943
isDPCallFunc))
953
944
{
954
945
Func->addFnAttr (llvm::Attribute::NoInline);
@@ -1013,128 +1004,6 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
1013
1004
return m_changed;
1014
1005
}
1015
1006
1016
- PreCompiledFuncImport::ImportedFunction::ImportedFunction (Function* F)
1017
- : F(F), type(EmuType::OTHER), funcInstructions(0 ), totalInstructions(0 )
1018
- {
1019
- // Count number of new instructions added by inlining.
1020
- for (BasicBlock& BB : F->getBasicBlockList ())
1021
- funcInstructions += BB.getInstList ().size ();
1022
-
1023
- updateUses ();
1024
-
1025
- // Get type of imported function.
1026
- StringRef name = F->getName ();
1027
-
1028
- if (name.equals (" __igcbuiltin_dp_div_nomadm_ieee" ) ||
1029
- name.equals (" __igcbuiltin_dp_div_nomadm_fast" ) ||
1030
- name.equals (" __igcbuiltin_dp_sqrt_nomadm_ieee" ) ||
1031
- name.equals (" __igcbuiltin_dp_sqrt_nomadm_fast" ))
1032
- {
1033
- type = EmuType::FASTDP;
1034
- }
1035
- else if (name.equals (" __igcbuiltin_dp_add" ) ||
1036
- name.equals (" __igcbuiltin_dp_sub" ) ||
1037
- name.equals (" __igcbuiltin_dp_fma" ) ||
1038
- name.equals (" __igcbuiltin_dp_mul" ) ||
1039
- name.equals (" __igcbuiltin_dp_div" ) ||
1040
- name.equals (" __igcbuiltin_dp_cmp" ) ||
1041
- name.equals (" __igcbuiltin_dp_to_int32" ) ||
1042
- name.equals (" __igcbuiltin_dp_to_uint32" ) ||
1043
- name.equals (" __igcbuiltin_int32_to_dp" ) ||
1044
- name.equals (" __igcbuiltin_uint32_to_dp" ) ||
1045
- name.equals (" __igcbuiltin_dp_to_sp" ) ||
1046
- name.equals (" __igcbuiltin_sp_to_dp" ) ||
1047
- name.equals (" __igcbuiltin_dp_sqrt" ))
1048
- {
1049
- // If true, it is a slow version of DP emu functions. Those functions
1050
- // are the original ones for just passing conformance, not for perf.
1051
- type = EmuType::SLOWDP;
1052
- }
1053
- else
1054
- {
1055
- for (int i = 0 ; i < NUM_FUNCTIONS && type == EmuType::OTHER; ++i)
1056
- {
1057
- for (int j = 0 ; j < NUM_TYPES && type == EmuType::OTHER; ++j)
1058
- {
1059
- if (name.equals (m_Int64SpDivRemFunctionNames[i][j]) ||
1060
- name.equals (m_Int64DpDivRemFunctionNames[i][j]))
1061
- {
1062
- type = EmuType::INT64;
1063
- }
1064
- }
1065
- }
1066
- }
1067
- }
1068
-
1069
- void PreCompiledFuncImport::ImportedFunction::updateUses ()
1070
- {
1071
- totalInstructions = funcInstructions * F->getNumUses ();
1072
- }
1073
-
1074
- PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::ImportedFunction::copy (ImportedFunction& other)
1075
- {
1076
- ValueToValueMapTy VM;
1077
- Function* copy = CloneFunction (other.F , VM);
1078
- return PreCompiledFuncImport::ImportedFunction (copy, other.type , other.funcInstructions , 0 );
1079
- }
1080
-
1081
- // Compare two imported functions in order preferred for inlining.
1082
- bool PreCompiledFuncImport::ImportedFunction::compare (ImportedFunction& L, ImportedFunction& R)
1083
- {
1084
- // First sort by preferred type of emulation.
1085
- if (L.type != R.type )
1086
- return L.type < R.type ;
1087
-
1088
- // Then sort by number of inlined instructions.
1089
- return L.totalInstructions < R.totalInstructions ;
1090
- };
1091
-
1092
- PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::createInlinedCopy (ImportedFunction& IF, unsigned n)
1093
- {
1094
- std::vector<CallInst*> toDelete;
1095
-
1096
- // Make copy that is always inlined.
1097
- ImportedFunction copy = ImportedFunction::copy (IF);
1098
- copy.F ->setName (IF.F ->getName () + " _always_inline" );
1099
- copy.F ->addFnAttr (llvm::Attribute::AlwaysInline);
1100
-
1101
- // Collect first n calls to replace with copy.
1102
- llvm::SmallVector<CallInst*, 8 > calls;
1103
- auto it = IF.F ->user_begin ();
1104
- for (unsigned i = 0 ; i < n; ++i)
1105
- {
1106
- CallInst* oldCall = dyn_cast<CallInst>(*(it++));
1107
- IGC_ASSERT (oldCall);
1108
- calls.push_back (oldCall);
1109
- }
1110
-
1111
- // Replace with always inlined copy.
1112
- for (CallInst* oldCall : calls)
1113
- {
1114
- std::vector<Value*> args;
1115
- for (unsigned arg = 0 ; arg < IGCLLVM::getNumArgOperands (oldCall); ++arg)
1116
- args.push_back (oldCall->getArgOperand (arg));
1117
-
1118
- // Create new call and insert it before old one
1119
- CallInst* newCall = CallInst::Create (copy.F , args, " " , oldCall);
1120
-
1121
- newCall->setCallingConv (copy.F ->getCallingConv ());
1122
- newCall->setAttributes (oldCall->getAttributes ());
1123
- newCall->setDebugLoc (oldCall->getDebugLoc ());
1124
-
1125
- oldCall->replaceAllUsesWith (newCall);
1126
- toDelete.push_back (oldCall);
1127
- }
1128
-
1129
- for (auto C : toDelete)
1130
- C->eraseFromParent ();
1131
-
1132
- copy.updateUses ();
1133
- IF.updateUses ();
1134
-
1135
- return copy;
1136
- }
1137
-
1138
1007
void PreCompiledFuncImport::visitBinaryOperator (BinaryOperator& I)
1139
1008
{
1140
1009
if (I.getOperand (0 )->getType ()->isIntOrIntVectorTy ())
@@ -2679,7 +2548,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
2679
2548
bool SPDiv = isSPDiv ();
2680
2549
bool DPEmu = isDPEmu ();
2681
2550
bool DPDivSqrtEmu = isDPDivSqrtEmu ();
2682
- bool I64DivRem = isI64DivRem ();
2683
2551
2684
2552
Module* M = m_pCtx->getModule ();
2685
2553
for (auto FI = M->begin (), FE = M->end (); FI != FE; ++FI)
@@ -2722,15 +2590,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
2722
2590
m_enableCallForEmulation = true ;
2723
2591
}
2724
2592
break ;
2725
- case Instruction::UDiv:
2726
- case Instruction::URem:
2727
- case Instruction::SDiv:
2728
- case Instruction::SRem:
2729
- if (I64DivRem && I->getOperand (0 )->getType ()->isIntegerTy (64 ))
2730
- {
2731
- m_enableCallForEmulation = true ;
2732
- }
2733
- break ;
2734
2593
}
2735
2594
2736
2595
GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
0 commit comments