@@ -17,7 +17,6 @@ SPDX-License-Identifier: MIT
17
17
#include " llvm/IR/InstIterator.h"
18
18
#include " llvm/Support/MemoryBuffer.h"
19
19
#include " llvm/Support/GenericDomTree.h"
20
- #include " llvm/Transforms/Utils/Cloning.h"
21
20
#include " llvm/Bitcode/BitcodeReader.h"
22
21
#include " llvm/Bitcode/BitcodeWriter.h"
23
22
#include " llvm/Linker/Linker.h"
@@ -633,16 +632,11 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
633
632
m_changed = false ;
634
633
635
634
// When we test it, we need to set emuKind
636
- if (IGC_GET_FLAG_VALUE (TestIGCPreCompiledFunctions) == 1 )
635
+ if (IGC_IS_FLAG_ENABLED (TestIGCPreCompiledFunctions))
637
636
{
638
637
m_emuKind = EmuKind::EMU_DP;
639
638
checkAndSetEnableSubroutine ();
640
639
}
641
- else if (IGC_GET_FLAG_VALUE (TestIGCPreCompiledFunctions) == 2 )
642
- {
643
- m_emuKind = EmuKind::EMU_DP_DIV_SQRT;
644
- checkAndSetEnableSubroutine ();
645
- }
646
640
// sanity check
647
641
if (m_emuKind == 0 ) {
648
642
// Nothing to emulate
@@ -832,11 +826,12 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
832
826
}
833
827
}
834
828
835
- llvm::SmallVector<ImportedFunction, 32 > importedFunctions;
836
- unsigned totalNumberOfInlinedInst = 0 , totalNumberOfPotentiallyInlinedInst = 0 ;
829
+ unsigned totalNumberOfInlinedInst = 0 ;
837
830
int emuFC = (int )IGC_GET_FLAG_VALUE (EmulationFunctionControl);
838
831
839
- // Post processing, set those imported functions as internal linkage.
832
+ // Post processing, set those imported functions as internal linkage
833
+ // and alwaysinline. Also count how many instructions would be added
834
+ // to the shader if inlining occurred.
840
835
for (auto II = M.begin (), IE = M.end (); II != IE; )
841
836
{
842
837
Function* Func = &(*II);
@@ -858,106 +853,92 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
858
853
continue ;
859
854
}
860
855
861
- if (std::find (importedFunctions.begin (), importedFunctions.end (), Func) == importedFunctions.end ())
862
- importedFunctions.push_back (Func);
863
- }
864
- else
865
- {
866
- // Make sure original func isn't inlined accidentally.
856
+ // Remove noinline/AlwaysInline attr if present.
857
+ Func->removeFnAttr (llvm::Attribute::NoInline);
867
858
Func->removeFnAttr (llvm::Attribute::AlwaysInline);
868
- }
869
- }
870
-
871
- // Sort imported instructions in preferred inlining order.
872
- std::sort (importedFunctions.begin (), importedFunctions.end (), ImportedFunction::compare);
873
859
874
- // Post processing, set those imported functions as alwaysinline.
875
- // Also count how many instructions would be added to the shader
876
- // if inlining occurred.
877
- for (auto II = importedFunctions.begin (), IE = importedFunctions.end (); II != IE; ++II)
878
- {
879
- Function* Func = II->F ;
860
+ if (m_enableCallForEmulation &&
861
+ emuFC != FLAG_FCALL_DEFAULT &&
862
+ emuFC != FLAG_FCALL_FORCE_INLINE)
863
+ {
864
+ // Disable inlining completely.
865
+ continue ;
866
+ }
880
867
881
- // Remove noinline/AlwaysInline attr if present.
882
- Func->removeFnAttr (llvm::Attribute::NoInline);
883
- Func->removeFnAttr (llvm::Attribute::AlwaysInline);
868
+ if (Func->hasOneUse () || emuFC == FLAG_FCALL_FORCE_INLINE)
869
+ {
870
+ Func->addFnAttr (llvm::Attribute::AlwaysInline);
871
+ continue ;
872
+ }
884
873
885
- if (m_enableCallForEmulation &&
886
- emuFC != FLAG_FCALL_DEFAULT &&
887
- emuFC != FLAG_FCALL_FORCE_INLINE)
888
- {
889
- // Disable inlining completely.
890
- continue ;
891
- }
874
+ // Count number of instructions in the function
875
+ unsigned NumInst = 0 ;
876
+ for (BasicBlock& BB : Func->getBasicBlockList ()) {
877
+ NumInst += BB.getInstList ().size ();
878
+ }
892
879
893
- if (Func-> hasOneUse () || emuFC == FLAG_FCALL_FORCE_INLINE)
894
- {
895
- Func-> addFnAttr (llvm::Attribute::AlwaysInline);
896
- continue ;
897
- }
880
+ // Don't want to subroutine small functions
881
+ if (NumInst <= 5 )
882
+ {
883
+ // Add AlwaysInline attribute to force inlining all calls.
884
+ Func-> addFnAttr (llvm::Attribute::AlwaysInline);
898
885
899
- // Don't want to subroutine small functions
900
- if (II->funcInstructions <= 5 )
901
- {
902
- // Add AlwaysInline attribute to force inlining all calls.
903
- Func->addFnAttr (llvm::Attribute::AlwaysInline);
886
+ continue ;
887
+ }
904
888
905
- continue ;
889
+ totalNumberOfInlinedInst += NumInst * Func-> getNumUses () ;
906
890
}
907
-
908
- // Don't inline original slow DP emu functions, they are only for passing
909
- // conformance, not for perf.
910
- if (isDPEmu () && II->isSlowDPEmuFunc ())
911
- continue ;
912
-
913
- totalNumberOfPotentiallyInlinedInst += II->totalInstructions ;
914
-
915
- // If function fits in threshold, always inline.
916
- if (totalNumberOfInlinedInst + II->totalInstructions <= (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold))
891
+ else
917
892
{
918
- totalNumberOfInlinedInst += II-> totalInstructions ;
919
- Func->addFnAttr (llvm::Attribute::AlwaysInline);
893
+ // Make sure original func isn't inlined accidentally.
894
+ Func->removeFnAttr (llvm::Attribute::AlwaysInline);
920
895
}
921
896
}
922
897
923
- // Check if more functions can fit in threshold if they would be split into inline/noinline copies.
924
- if (m_enableCallForEmulation && emuFC == FLAG_FCALL_DEFAULT && totalNumberOfInlinedInst < ( unsigned ) IGC_GET_FLAG_VALUE (InlinedEmulationThreshold))
925
- {
926
- for ( auto II = importedFunctions. begin (); II != importedFunctions. end (); ++II)
927
- {
928
- Function* Func = II-> F ;
929
-
930
- if (Func-> hasFnAttribute (llvm::Attribute::AlwaysInline ) || ( isDPEmu () && II-> isSlowDPEmuFunc ()))
931
- continue ;
932
-
933
- unsigned calls = (( unsigned ) IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) - totalNumberOfInlinedInst) / II-> funcInstructions ;
934
- if (calls > 0 )
935
- {
936
- // Split function into inline/no-inline copies.
937
- ImportedFunction copy = createInlinedCopy (*II, calls);
938
- importedFunctions. push_back (copy);
939
- totalNumberOfInlinedInst += copy. totalInstructions ;
940
- }
898
+ // If true, it is a slow version of DP emu functions. Those functions
899
+ // are the original ones for just passing conformance, not for perf.
900
+ auto isSlowDPEmuFunc = [](Function* F) {
901
+ StringRef FN = F-> getName ();
902
+ if (FN. equals ( " __igcbuiltin_dp_add " ) ||
903
+ FN. equals ( " __igcbuiltin_dp_sub " ) ||
904
+ FN. equals ( " __igcbuiltin_dp_fma " ) ||
905
+ FN. equals ( " __igcbuiltin_dp_mul " ) ||
906
+ FN. equals ( " __igcbuiltin_dp_div " ) ||
907
+ FN. equals ( " __igcbuiltin_dp_cmp " ) ||
908
+ FN. equals ( " __igcbuiltin_dp_to_int32 " ) ||
909
+ FN. equals ( " __igcbuiltin_dp_to_uint32 " ) ||
910
+ FN. equals ( " __igcbuiltin_int32_to_dp " ) ||
911
+ FN. equals ( " __igcbuiltin_uint32_to_dp " ) ||
912
+ FN. equals ( " __igcbuiltin_dp_to_sp " ) ||
913
+ FN. equals ( " __igcbuiltin_sp_to_dp " ) ||
914
+ FN. equals ( " __igcbuiltin_dp_sqrt " )) {
915
+ return true ;
941
916
}
942
- }
917
+ return false ;
918
+ };
943
919
944
- for (auto II = importedFunctions .begin (), IE = importedFunctions .end (); II != IE; ++II )
920
+ for (auto II = M .begin (), IE = M .end (); II != IE; )
945
921
{
946
- Function* Func = II->F ;
922
+ Function* Func = &(*II);
923
+ ++II;
924
+ if (!Func || Func->isDeclaration ())
925
+ {
926
+ continue ;
927
+ }
947
928
948
- if (!Func->hasFnAttribute (llvm::Attribute::AlwaysInline))
929
+ if (!origFunctions. count (Func) && ! Func->hasFnAttribute (llvm::Attribute::AlwaysInline))
949
930
{
950
931
// Special handling of DP functions: any one that has not been marked as inline
951
932
// at this point, it will be either subroutine or stackcall.
952
- const bool isDPCallFunc = (isDPEmu () && II-> isSlowDPEmuFunc ());
933
+ const bool isDPCallFunc = (isDPEmu () && isSlowDPEmuFunc (Func ));
953
934
954
935
// Use subroutine/stackcall for some DP emulation functions if
955
936
// EmulationFunctionControl is set so, or
956
937
// use subroutines if total number of instructions added when
957
938
// all emulated functions are inlined exceed InlinedEmulationThreshold.
958
939
// If Func is a slow version of DP emu func, perf isn't important.
959
940
if (m_enableCallForEmulation &&
960
- (totalNumberOfPotentiallyInlinedInst > (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) ||
941
+ (totalNumberOfInlinedInst > (unsigned )IGC_GET_FLAG_VALUE (InlinedEmulationThreshold) ||
961
942
isDPCallFunc))
962
943
{
963
944
Func->addFnAttr (llvm::Attribute::NoInline);
@@ -1022,128 +1003,6 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
1022
1003
return m_changed;
1023
1004
}
1024
1005
1025
- PreCompiledFuncImport::ImportedFunction::ImportedFunction (Function* F)
1026
- : F(F), type(EmuType::OTHER), funcInstructions(0 ), totalInstructions(0 )
1027
- {
1028
- // Count number of new instructions added by inlining.
1029
- for (BasicBlock& BB : F->getBasicBlockList ())
1030
- funcInstructions += BB.getInstList ().size ();
1031
-
1032
- updateUses ();
1033
-
1034
- // Get type of imported function.
1035
- StringRef name = F->getName ();
1036
-
1037
- if (name.equals (" __igcbuiltin_dp_div_nomadm_ieee" ) ||
1038
- name.equals (" __igcbuiltin_dp_div_nomadm_fast" ) ||
1039
- name.equals (" __igcbuiltin_dp_sqrt_nomadm_ieee" ) ||
1040
- name.equals (" __igcbuiltin_dp_sqrt_nomadm_fast" ))
1041
- {
1042
- type = EmuType::FASTDP;
1043
- }
1044
- else if (name.equals (" __igcbuiltin_dp_add" ) ||
1045
- name.equals (" __igcbuiltin_dp_sub" ) ||
1046
- name.equals (" __igcbuiltin_dp_fma" ) ||
1047
- name.equals (" __igcbuiltin_dp_mul" ) ||
1048
- name.equals (" __igcbuiltin_dp_div" ) ||
1049
- name.equals (" __igcbuiltin_dp_cmp" ) ||
1050
- name.equals (" __igcbuiltin_dp_to_int32" ) ||
1051
- name.equals (" __igcbuiltin_dp_to_uint32" ) ||
1052
- name.equals (" __igcbuiltin_int32_to_dp" ) ||
1053
- name.equals (" __igcbuiltin_uint32_to_dp" ) ||
1054
- name.equals (" __igcbuiltin_dp_to_sp" ) ||
1055
- name.equals (" __igcbuiltin_sp_to_dp" ) ||
1056
- name.equals (" __igcbuiltin_dp_sqrt" ))
1057
- {
1058
- // If true, it is a slow version of DP emu functions. Those functions
1059
- // are the original ones for just passing conformance, not for perf.
1060
- type = EmuType::SLOWDP;
1061
- }
1062
- else
1063
- {
1064
- for (int i = 0 ; i < NUM_FUNCTIONS && type == EmuType::OTHER; ++i)
1065
- {
1066
- for (int j = 0 ; j < NUM_TYPES && type == EmuType::OTHER; ++j)
1067
- {
1068
- if (name.equals (m_Int64SpDivRemFunctionNames[i][j]) ||
1069
- name.equals (m_Int64DpDivRemFunctionNames[i][j]))
1070
- {
1071
- type = EmuType::INT64;
1072
- }
1073
- }
1074
- }
1075
- }
1076
- }
1077
-
1078
- void PreCompiledFuncImport::ImportedFunction::updateUses ()
1079
- {
1080
- totalInstructions = funcInstructions * F->getNumUses ();
1081
- }
1082
-
1083
- PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::ImportedFunction::copy (ImportedFunction& other)
1084
- {
1085
- ValueToValueMapTy VM;
1086
- Function* copy = CloneFunction (other.F , VM);
1087
- return PreCompiledFuncImport::ImportedFunction (copy, other.type , other.funcInstructions , 0 );
1088
- }
1089
-
1090
- // Compare two imported functions in order preferred for inlining.
1091
- bool PreCompiledFuncImport::ImportedFunction::compare (ImportedFunction& L, ImportedFunction& R)
1092
- {
1093
- // First sort by preferred type of emulation.
1094
- if (L.type != R.type )
1095
- return L.type < R.type ;
1096
-
1097
- // Then sort by number of inlined instructions.
1098
- return L.totalInstructions < R.totalInstructions ;
1099
- };
1100
-
1101
- PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::createInlinedCopy (ImportedFunction& IF, unsigned n)
1102
- {
1103
- std::vector<CallInst*> toDelete;
1104
-
1105
- // Make copy that is always inlined.
1106
- ImportedFunction copy = ImportedFunction::copy (IF);
1107
- copy.F ->setName (IF.F ->getName () + " _always_inline" );
1108
- copy.F ->addFnAttr (llvm::Attribute::AlwaysInline);
1109
-
1110
- // Collect first n calls to replace with copy.
1111
- llvm::SmallVector<CallInst*, 8 > calls;
1112
- auto it = IF.F ->user_begin ();
1113
- for (unsigned i = 0 ; i < n; ++i)
1114
- {
1115
- CallInst* oldCall = dyn_cast<CallInst>(*(it++));
1116
- IGC_ASSERT (oldCall);
1117
- calls.push_back (oldCall);
1118
- }
1119
-
1120
- // Replace with always inlined copy.
1121
- for (CallInst* oldCall : calls)
1122
- {
1123
- std::vector<Value*> args;
1124
- for (unsigned arg = 0 ; arg < IGCLLVM::getNumArgOperands (oldCall); ++arg)
1125
- args.push_back (oldCall->getArgOperand (arg));
1126
-
1127
- // Create new call and insert it before old one
1128
- CallInst* newCall = CallInst::Create (copy.F , args, " " , oldCall);
1129
-
1130
- newCall->setCallingConv (copy.F ->getCallingConv ());
1131
- newCall->setAttributes (oldCall->getAttributes ());
1132
- newCall->setDebugLoc (oldCall->getDebugLoc ());
1133
-
1134
- oldCall->replaceAllUsesWith (newCall);
1135
- toDelete.push_back (oldCall);
1136
- }
1137
-
1138
- for (auto C : toDelete)
1139
- C->eraseFromParent ();
1140
-
1141
- copy.updateUses ();
1142
- IF.updateUses ();
1143
-
1144
- return copy;
1145
- }
1146
-
1147
1006
void PreCompiledFuncImport::visitBinaryOperator (BinaryOperator& I)
1148
1007
{
1149
1008
if (I.getOperand (0 )->getType ()->isIntOrIntVectorTy ())
@@ -2688,7 +2547,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
2688
2547
bool SPDiv = isSPDiv ();
2689
2548
bool DPEmu = isDPEmu ();
2690
2549
bool DPDivSqrtEmu = isDPDivSqrtEmu ();
2691
- bool I64DivRem = isI64DivRem ();
2692
2550
2693
2551
Module* M = m_pCtx->getModule ();
2694
2552
for (auto FI = M->begin (), FE = M->end (); FI != FE; ++FI)
@@ -2731,15 +2589,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
2731
2589
m_enableCallForEmulation = true ;
2732
2590
}
2733
2591
break ;
2734
- case Instruction::UDiv:
2735
- case Instruction::URem:
2736
- case Instruction::SDiv:
2737
- case Instruction::SRem:
2738
- if (I64DivRem && I->getOperand (0 )->getType ()->isIntegerTy (64 ))
2739
- {
2740
- m_enableCallForEmulation = true ;
2741
- }
2742
- break ;
2743
2592
}
2744
2593
2745
2594
GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
0 commit comments