Skip to content

Commit 8afa67f

Browse files
pkwasnie-intelpszymich
authored andcommitted
Multi-revert of auto-GRF related patches
Revert `igc-1.0.15468.18` " limit max GRF if kernel is SIMD16" This reverts commit 04e24cd. Revert `igc-1.0.15468.17` " Add option to set maximum GRF number that vISA can select" This reverts commit 555731c. Revert " precompiled emulation inlining improvements" This reverts commit a239bf5.
1 parent 04e24cd commit 8afa67f

File tree

8 files changed

+76
-357
lines changed

8 files changed

+76
-357
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4603,13 +4603,6 @@ namespace IGC
46034603
SaveOption(vISA_AutoGRFSelection, true);
46044604
}
46054605

4606-
if (m_program->m_Platform->supportsAutoGRFSelection() &&
4607-
m_program->m_Platform->getPlatformInfo().eProductFamily == IGFX_PVC &&
4608-
m_program->m_dispatchSize == SIMDMode::SIMD16)
4609-
{
4610-
SaveOption(vISA_MaxGRFNum, unsigned(128));
4611-
}
4612-
46134606
// Emit warnings if mismatch is found in user input
46144607
// Mismatch between number of threads and GRF size (per module)
46154608
if (ClContext->getNumThreadsPerEU() > 0 && ClContext->getExpGRFSize() > 0 &&

IGC/Compiler/Optimizer/PreCompiledFuncImport.cpp

Lines changed: 65 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ SPDX-License-Identifier: MIT
1717
#include "llvm/IR/InstIterator.h"
1818
#include "llvm/Support/MemoryBuffer.h"
1919
#include "llvm/Support/GenericDomTree.h"
20-
#include "llvm/Transforms/Utils/Cloning.h"
2120
#include "llvm/Bitcode/BitcodeReader.h"
2221
#include "llvm/Bitcode/BitcodeWriter.h"
2322
#include "llvm/Linker/Linker.h"
@@ -633,7 +632,7 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
633632
m_changed = false;
634633

635634
// When we test it, we need to set emuKind
636-
if (IGC_GET_FLAG_VALUE(TestIGCPreCompiledFunctions) == 1)
635+
if (IGC_IS_FLAG_ENABLED(TestIGCPreCompiledFunctions))
637636
{
638637
m_emuKind = IGC_GET_FLAG_VALUE(ForceEmuKind) ? IGC_GET_FLAG_VALUE(ForceEmuKind) : EmuKind::EMU_DP;
639638
checkAndSetEnableSubroutine();
@@ -828,11 +827,12 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
828827
}
829828
}
830829

831-
llvm::SmallVector<ImportedFunction, 32> importedFunctions;
832-
unsigned totalNumberOfInlinedInst = 0, totalNumberOfPotentiallyInlinedInst = 0;
830+
unsigned totalNumberOfInlinedInst = 0;
833831
int emuFC = (int)IGC_GET_FLAG_VALUE(EmulationFunctionControl);
834832

835-
// Post processing, set those imported functions as internal linkage.
833+
// Post processing, set those imported functions as internal linkage
834+
// and alwaysinline. Also count how many instructions would be added
835+
// to the shader if inlining occurred.
836836
for (auto II = M.begin(), IE = M.end(); II != IE; )
837837
{
838838
Function* Func = &(*II);
@@ -854,101 +854,92 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
854854
continue;
855855
}
856856

857-
if (std::find(importedFunctions.begin(), importedFunctions.end(), Func) == importedFunctions.end())
858-
importedFunctions.push_back(Func);
859-
}
860-
else
861-
{
862-
// Make sure original func isn't inlined accidentally.
857+
// Remove noinline/AlwaysInline attr if present.
858+
Func->removeFnAttr(llvm::Attribute::NoInline);
863859
Func->removeFnAttr(llvm::Attribute::AlwaysInline);
864-
}
865-
}
866-
867-
// Sort imported instructions in preferred inlining order.
868-
std::sort(importedFunctions.begin(), importedFunctions.end(), ImportedFunction::compare);
869860

870-
// Post processing, set those imported functions as alwaysinline.
871-
// Also count how many instructions would be added to the shader
872-
// if inlining occurred.
873-
for (auto II = importedFunctions.begin(), IE = importedFunctions.end(); II != IE; ++II)
874-
{
875-
Function* Func = II->F;
861+
if (m_enableCallForEmulation &&
862+
emuFC != FLAG_FCALL_DEFAULT &&
863+
emuFC != FLAG_FCALL_FORCE_INLINE)
864+
{
865+
// Disable inlining completely.
866+
continue;
867+
}
876868

877-
// Remove noinline/AlwaysInline attr if present.
878-
Func->removeFnAttr(llvm::Attribute::NoInline);
879-
Func->removeFnAttr(llvm::Attribute::AlwaysInline);
869+
if (Func->hasOneUse() || emuFC == FLAG_FCALL_FORCE_INLINE)
870+
{
871+
Func->addFnAttr(llvm::Attribute::AlwaysInline);
872+
continue;
873+
}
880874

881-
if (m_enableCallForEmulation &&
882-
emuFC != FLAG_FCALL_DEFAULT &&
883-
emuFC != FLAG_FCALL_FORCE_INLINE)
884-
{
885-
// Disable inlining completely.
886-
continue;
887-
}
875+
// Count number of instructions in the function
876+
unsigned NumInst = 0;
877+
for (BasicBlock& BB : Func->getBasicBlockList()) {
878+
NumInst += BB.getInstList().size();
879+
}
888880

889-
if (Func->hasOneUse() || emuFC == FLAG_FCALL_FORCE_INLINE)
890-
{
891-
Func->addFnAttr(llvm::Attribute::AlwaysInline);
892-
continue;
893-
}
881+
// Don't want to subroutine small functions
882+
if (NumInst <= 5)
883+
{
884+
// Add AlwaysInline attribute to force inlining all calls.
885+
Func->addFnAttr(llvm::Attribute::AlwaysInline);
894886

895-
// Don't want to subroutine small functions
896-
if (II->funcInstructions <= 5)
897-
{
898-
// Add AlwaysInline attribute to force inlining all calls.
899-
Func->addFnAttr(llvm::Attribute::AlwaysInline);
887+
continue;
888+
}
900889

901-
continue;
890+
totalNumberOfInlinedInst += NumInst * Func->getNumUses();
902891
}
903-
904-
totalNumberOfPotentiallyInlinedInst += II->totalInstructions;
905-
906-
// If function fits in threshold, always inline.
907-
if (totalNumberOfInlinedInst + II->totalInstructions <= (unsigned)IGC_GET_FLAG_VALUE(InlinedEmulationThreshold))
892+
else
908893
{
909-
totalNumberOfInlinedInst += II->totalInstructions;
910-
Func->addFnAttr(llvm::Attribute::AlwaysInline);
894+
// Make sure original func isn't inlined accidentally.
895+
Func->removeFnAttr(llvm::Attribute::AlwaysInline);
911896
}
912897
}
913898

914-
// Check if more functions can fit in threshold if they would be split into inline/noinline copies.
915-
if (m_enableCallForEmulation && emuFC == FLAG_FCALL_DEFAULT && totalNumberOfInlinedInst < (unsigned)IGC_GET_FLAG_VALUE(InlinedEmulationThreshold))
916-
{
917-
for (auto II = importedFunctions.begin(); II != importedFunctions.end(); ++II)
918-
{
919-
Function* Func = II->F;
920-
921-
if (Func->hasFnAttribute(llvm::Attribute::AlwaysInline))
922-
continue;
923-
924-
unsigned calls = ((unsigned)IGC_GET_FLAG_VALUE(InlinedEmulationThreshold) - totalNumberOfInlinedInst) / II->funcInstructions;
925-
if (calls > 0)
926-
{
927-
// Split function into inline/no-inline copies.
928-
ImportedFunction copy = createInlinedCopy(*II, calls);
929-
importedFunctions.push_back(copy);
930-
totalNumberOfInlinedInst += copy.totalInstructions;
931-
}
899+
// If true, it is a slow version of DP emu functions. Those functions
900+
// are the original ones for just passing conformance, not for perf.
901+
auto isSlowDPEmuFunc = [](Function* F) {
902+
StringRef FN = F->getName();
903+
if (FN.equals("__igcbuiltin_dp_add") ||
904+
FN.equals("__igcbuiltin_dp_sub") ||
905+
FN.equals("__igcbuiltin_dp_fma") ||
906+
FN.equals("__igcbuiltin_dp_mul") ||
907+
FN.equals("__igcbuiltin_dp_div") ||
908+
FN.equals("__igcbuiltin_dp_cmp") ||
909+
FN.equals("__igcbuiltin_dp_to_int32") ||
910+
FN.equals("__igcbuiltin_dp_to_uint32") ||
911+
FN.equals("__igcbuiltin_int32_to_dp") ||
912+
FN.equals("__igcbuiltin_uint32_to_dp") ||
913+
FN.equals("__igcbuiltin_dp_to_sp") ||
914+
FN.equals("__igcbuiltin_sp_to_dp") ||
915+
FN.equals("__igcbuiltin_dp_sqrt")) {
916+
return true;
932917
}
933-
}
918+
return false;
919+
};
934920

935-
for (auto II = importedFunctions.begin(), IE = importedFunctions.end(); II != IE; ++II)
921+
for (auto II = M.begin(), IE = M.end(); II != IE; )
936922
{
937-
Function* Func = II->F;
923+
Function* Func = &(*II);
924+
++II;
925+
if (!Func || Func->isDeclaration())
926+
{
927+
continue;
928+
}
938929

939-
if (!Func->hasFnAttribute(llvm::Attribute::AlwaysInline))
930+
if (!origFunctions.count(Func) && !Func->hasFnAttribute(llvm::Attribute::AlwaysInline))
940931
{
941932
// Special handling of DP functions: any one that has not been marked as inline
942933
// at this point, it will be either subroutine or stackcall.
943-
const bool isDPCallFunc = (isDPEmu() && II->isSlowDPEmuFunc());
934+
const bool isDPCallFunc = (isDPEmu() && isSlowDPEmuFunc(Func));
944935

945936
// Use subroutine/stackcall for some DP emulation functions if
946937
// EmulationFunctionControl is set so, or
947938
// use subroutines if total number of instructions added when
948939
// all emulated functions are inlined exceed InlinedEmulationThreshold.
949940
// If Func is a slow version of DP emu func, perf isn't important.
950941
if (m_enableCallForEmulation &&
951-
(totalNumberOfPotentiallyInlinedInst > (unsigned)IGC_GET_FLAG_VALUE(InlinedEmulationThreshold) ||
942+
(totalNumberOfInlinedInst > (unsigned)IGC_GET_FLAG_VALUE(InlinedEmulationThreshold) ||
952943
isDPCallFunc))
953944
{
954945
Func->addFnAttr(llvm::Attribute::NoInline);
@@ -1013,128 +1004,6 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
10131004
return m_changed;
10141005
}
10151006

1016-
PreCompiledFuncImport::ImportedFunction::ImportedFunction(Function* F)
1017-
: F(F), type(EmuType::OTHER), funcInstructions(0), totalInstructions(0)
1018-
{
1019-
// Count number of new instructions added by inlining.
1020-
for (BasicBlock& BB : F->getBasicBlockList())
1021-
funcInstructions += BB.getInstList().size();
1022-
1023-
updateUses();
1024-
1025-
// Get type of imported function.
1026-
StringRef name = F->getName();
1027-
1028-
if (name.equals("__igcbuiltin_dp_div_nomadm_ieee") ||
1029-
name.equals("__igcbuiltin_dp_div_nomadm_fast") ||
1030-
name.equals("__igcbuiltin_dp_sqrt_nomadm_ieee") ||
1031-
name.equals("__igcbuiltin_dp_sqrt_nomadm_fast"))
1032-
{
1033-
type = EmuType::FASTDP;
1034-
}
1035-
else if (name.equals("__igcbuiltin_dp_add") ||
1036-
name.equals("__igcbuiltin_dp_sub") ||
1037-
name.equals("__igcbuiltin_dp_fma") ||
1038-
name.equals("__igcbuiltin_dp_mul") ||
1039-
name.equals("__igcbuiltin_dp_div") ||
1040-
name.equals("__igcbuiltin_dp_cmp") ||
1041-
name.equals("__igcbuiltin_dp_to_int32") ||
1042-
name.equals("__igcbuiltin_dp_to_uint32") ||
1043-
name.equals("__igcbuiltin_int32_to_dp") ||
1044-
name.equals("__igcbuiltin_uint32_to_dp") ||
1045-
name.equals("__igcbuiltin_dp_to_sp") ||
1046-
name.equals("__igcbuiltin_sp_to_dp") ||
1047-
name.equals("__igcbuiltin_dp_sqrt"))
1048-
{
1049-
// If true, it is a slow version of DP emu functions. Those functions
1050-
// are the original ones for just passing conformance, not for perf.
1051-
type = EmuType::SLOWDP;
1052-
}
1053-
else
1054-
{
1055-
for (int i = 0; i < NUM_FUNCTIONS && type == EmuType::OTHER; ++i)
1056-
{
1057-
for (int j = 0; j < NUM_TYPES && type == EmuType::OTHER; ++j)
1058-
{
1059-
if (name.equals(m_Int64SpDivRemFunctionNames[i][j]) ||
1060-
name.equals(m_Int64DpDivRemFunctionNames[i][j]))
1061-
{
1062-
type = EmuType::INT64;
1063-
}
1064-
}
1065-
}
1066-
}
1067-
}
1068-
1069-
void PreCompiledFuncImport::ImportedFunction::updateUses()
1070-
{
1071-
totalInstructions = funcInstructions * F->getNumUses();
1072-
}
1073-
1074-
PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::ImportedFunction::copy(ImportedFunction& other)
1075-
{
1076-
ValueToValueMapTy VM;
1077-
Function* copy = CloneFunction(other.F, VM);
1078-
return PreCompiledFuncImport::ImportedFunction(copy, other.type, other.funcInstructions, 0);
1079-
}
1080-
1081-
// Compare two imported functions in order preferred for inlining.
1082-
bool PreCompiledFuncImport::ImportedFunction::compare(ImportedFunction& L, ImportedFunction& R)
1083-
{
1084-
// First sort by preferred type of emulation.
1085-
if (L.type != R.type)
1086-
return L.type < R.type;
1087-
1088-
// Then sort by number of inlined instructions.
1089-
return L.totalInstructions < R.totalInstructions;
1090-
};
1091-
1092-
PreCompiledFuncImport::ImportedFunction PreCompiledFuncImport::createInlinedCopy(ImportedFunction& IF, unsigned n)
1093-
{
1094-
std::vector<CallInst*> toDelete;
1095-
1096-
// Make copy that is always inlined.
1097-
ImportedFunction copy = ImportedFunction::copy(IF);
1098-
copy.F->setName(IF.F->getName() + "_always_inline");
1099-
copy.F->addFnAttr(llvm::Attribute::AlwaysInline);
1100-
1101-
// Collect first n calls to replace with copy.
1102-
llvm::SmallVector<CallInst*, 8> calls;
1103-
auto it = IF.F->user_begin();
1104-
for (unsigned i = 0; i < n; ++i)
1105-
{
1106-
CallInst* oldCall = dyn_cast<CallInst>(*(it++));
1107-
IGC_ASSERT(oldCall);
1108-
calls.push_back(oldCall);
1109-
}
1110-
1111-
// Replace with always inlined copy.
1112-
for (CallInst* oldCall : calls)
1113-
{
1114-
std::vector<Value*> args;
1115-
for (unsigned arg = 0; arg < IGCLLVM::getNumArgOperands(oldCall); ++arg)
1116-
args.push_back(oldCall->getArgOperand(arg));
1117-
1118-
// Create new call and insert it before old one
1119-
CallInst* newCall = CallInst::Create(copy.F, args, "", oldCall);
1120-
1121-
newCall->setCallingConv(copy.F->getCallingConv());
1122-
newCall->setAttributes(oldCall->getAttributes());
1123-
newCall->setDebugLoc(oldCall->getDebugLoc());
1124-
1125-
oldCall->replaceAllUsesWith(newCall);
1126-
toDelete.push_back(oldCall);
1127-
}
1128-
1129-
for (auto C : toDelete)
1130-
C->eraseFromParent();
1131-
1132-
copy.updateUses();
1133-
IF.updateUses();
1134-
1135-
return copy;
1136-
}
1137-
11381007
void PreCompiledFuncImport::visitBinaryOperator(BinaryOperator& I)
11391008
{
11401009
if (I.getOperand(0)->getType()->isIntOrIntVectorTy())
@@ -2679,7 +2548,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
26792548
bool SPDiv = isSPDiv();
26802549
bool DPEmu = isDPEmu();
26812550
bool DPDivSqrtEmu = isDPDivSqrtEmu();
2682-
bool I64DivRem = isI64DivRem();
26832551

26842552
Module* M = m_pCtx->getModule();
26852553
for (auto FI = M->begin(), FE = M->end(); FI != FE; ++FI)
@@ -2722,15 +2590,6 @@ void PreCompiledFuncImport::checkAndSetEnableSubroutine()
27222590
m_enableCallForEmulation = true;
27232591
}
27242592
break;
2725-
case Instruction::UDiv:
2726-
case Instruction::URem:
2727-
case Instruction::SDiv:
2728-
case Instruction::SRem:
2729-
if (I64DivRem && I->getOperand(0)->getType()->isIntegerTy(64))
2730-
{
2731-
m_enableCallForEmulation = true;
2732-
}
2733-
break;
27342593
}
27352594

27362595
GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);

0 commit comments

Comments
 (0)