Skip to content

Commit 9346e39

Browse files
admitricigcbot
authored andcommitted
Minor CodeLoopSinking refactoring
Minor CodeLoopSinking refactoring
1 parent 8d83ede commit 9346e39

File tree

10 files changed

+117
-129
lines changed

10 files changed

+117
-129
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

Lines changed: 106 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ namespace IGC {
815815
// Helper functions for loop sink debug dumps
816816
#define PrintDump(Contents) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {LogStream << Contents;}
817817
#define PrintInstructionDump(Inst) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {Inst->print(LogStream, false); LogStream << "\n";}
818-
#define PrintOUGDump(OUG) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG->print(LogStream); LogStream << "\n";}
818+
#define PrintOUGDump(OUG) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG.print(LogStream); LogStream << "\n";}
819819

820820

821821
// Register pass to igc-opt
@@ -851,7 +851,7 @@ namespace IGC {
851851
return false;
852852

853853
if (IGC_IS_FLAG_ENABLED(DisableCodeSinking) ||
854-
numInsts(F) < IGC_GET_FLAG_VALUE(CodeSinkingMinSize))
854+
numInsts(F) < IGC_GET_FLAG_VALUE(CodeLoopSinkingMinSize))
855855
{
856856
return false;
857857
}
@@ -900,31 +900,30 @@ namespace IGC {
900900
if (IGC_IS_FLAG_ENABLED(DumpLoopSink))
901901
{
902902
if (IGC_IS_FLAG_ENABLED(PrintToConsole))
903-
{
904903
IGC::Debug::ods() << Log;
905-
}
906904
else
907-
{
908-
auto Name = Debug::DumpName(IGC::Debug::GetShaderOutputName())
909-
.Hash(CTX->hash)
910-
.Type(CTX->type)
911-
.Retry(CTX->m_retryManager.GetRetryId())
912-
.Pass("loopsink")
913-
.Extension("txt");
914-
IGC::Debug::DumpLock();
915-
std::ofstream OutputFile(Name.str(), std::ios_base::app);
916-
if (OutputFile.is_open())
917-
{
918-
OutputFile << Log;
919-
}
920-
OutputFile.close();
921-
IGC::Debug::DumpUnlock();
922-
}
905+
dumpToFile(Log);
923906
}
924907

925908
return Changed;
926909
}
927910

911+
void CodeLoopSinking::dumpToFile(const std::string& Log)
912+
{
913+
auto Name = Debug::DumpName(IGC::Debug::GetShaderOutputName())
914+
.Hash(CTX->hash)
915+
.Type(CTX->type)
916+
.Retry(CTX->m_retryManager.GetRetryId())
917+
.Pass("loopsink")
918+
.Extension("txt");
919+
IGC::Debug::DumpLock();
920+
std::ofstream OutputFile(Name.str(), std::ios_base::app);
921+
if (OutputFile.is_open())
922+
OutputFile << Log;
923+
OutputFile.close();
924+
IGC::Debug::DumpUnlock();
925+
}
926+
928927
// Implementation of RPE->getMaxRegCountForLoop(*L, SIMD);
929928
// with per-BB pressure caching to improve compile-time
930929
uint CodeLoopSinking::getMaxRegCountForLoop(Loop *L)
@@ -974,6 +973,22 @@ namespace IGC {
974973
uint NGRF = CTX->getNumGRFPerThread();
975974
uint SIMD = numLanes(RPE->bestGuessSIMDSize());
976975

976+
PrintDump("\n");
977+
if (!Preheader->getName().empty())
978+
{
979+
PrintDump("Checking loop with preheader " << Preheader->getName() << ": \n");
980+
}
981+
else if (!Preheader->empty())
982+
{
983+
PrintDump("Checking loop with unnamed preheader. First preheader instruction:\n");
984+
Instruction* First = &Preheader->front();
985+
PrintInstructionDump(First);
986+
}
987+
else
988+
{
989+
PrintDump("Checking loop with unnamed empty preheader.");
990+
}
991+
977992
// Estimate preheader's potential to sink
978993
ValueSet PreheaderDefs = RPE->getDefs(*Preheader);
979994
// Filter out preheader defined values that are used not in the loop or not supported
@@ -987,6 +1002,13 @@ namespace IGC {
9871002
PreheaderDefsCandidates.insert(V);
9881003
}
9891004
}
1005+
1006+
if (PreheaderDefsCandidates.empty())
1007+
{
1008+
PrintDump(">> No sinking candidates in the preheader.\n");
1009+
return LoopSinkMode::NoSink;
1010+
}
1011+
9901012
uint PreheaderDefsSizeInBytes = RPE->estimateSizeInBytes(PreheaderDefsCandidates, *F, SIMD, &WI);
9911013
uint PreheaderDefsSizeInRegs = RPE->bytesToRegisters(PreheaderDefsSizeInBytes);
9921014

@@ -1002,22 +1024,6 @@ namespace IGC {
10021024
(PreheaderDefsSizeInRegs > (MaxLoopPressure - NGRF) * LOOPSINK_PREHEADER_IMPACT_THRESHOLD));
10031025
};
10041026

1005-
PrintDump("\n");
1006-
if (!Preheader->getName().empty())
1007-
{
1008-
PrintDump("Checking loop with preheader " << Preheader->getName() << ": \n");
1009-
}
1010-
else if (!Preheader->empty())
1011-
{
1012-
PrintDump("Checking loop with unnamed preheader. First preheader instruction:\n");
1013-
Instruction* First = &Preheader->front();
1014-
PrintInstructionDump(First);
1015-
}
1016-
else
1017-
{
1018-
PrintDump("Checking loop with unnamed empty preheader.");
1019-
}
1020-
10211027
PrintDump("Threshold to sink = " << NGRF + GRFThresholdDelta << "\n");
10221028
PrintDump("MaxLoopPressure = " << MaxLoopPressure << "\n");
10231029
PrintDump("MaxLoopPressure + FunctionExternalPressure = " << MaxLoopPressure + FunctionExternalPressure << "\n");
@@ -1453,7 +1459,7 @@ namespace IGC {
14531459
};
14541460

14551461
// Check if it's beneficial to sink it in the loop
1456-
auto isBeneficialToSink = [&](OperandUseGroup *OUG)-> bool
1462+
auto isBeneficialToSink = [&](OperandUseGroup &OUG)-> bool
14571463
{
14581464
auto getDstSize = [this](Value *V)
14591465
{
@@ -1472,10 +1478,8 @@ namespace IGC {
14721478
return DstSize;
14731479
};
14741480

1475-
IGC_ASSERT(OUG);
1476-
14771481
// All instructions are safe to sink always or consume larger type than produce
1478-
if (std::all_of(OUG->Users.begin(), OUG->Users.end(),
1482+
if (std::all_of(OUG.Users.begin(), OUG.Users.end(),
14791483
[this](Instruction *I)
14801484
{
14811485
return isAlwaysSinkInstruction(I) || isCastInstrReducingPressure(I, false);
@@ -1489,7 +1493,7 @@ namespace IGC {
14891493
// is uniform, but the User (instruction to sink) is uniform, we'll decide it's beneficial to sink
14901494
int AccSave = 0;
14911495

1492-
for (Value *V : OUG->Operands)
1496+
for (Value *V : OUG.Operands)
14931497
{
14941498
int DstSize = getDstSize(V);
14951499
if (!DstSize)
@@ -1500,7 +1504,7 @@ namespace IGC {
15001504
}
15011505

15021506
bool AllUsersAreUniform = true;
1503-
for (Value *V : OUG->Users)
1507+
for (Value *V : OUG.Users)
15041508
{
15051509
int DstSize = getDstSize(V);
15061510
if (!DstSize)
@@ -1513,15 +1517,15 @@ namespace IGC {
15131517

15141518
// If all uses are uniform, and we save enough SSA-values it's still beneficial
15151519
if (AccSave >= 0 && AllUsersAreUniform &&
1516-
((int)OUG->Users.size() - (int)OUG->Operands.size() >= (int)(IGC_GET_FLAG_VALUE(LoopSinkMinSaveUniform))))
1520+
((int)OUG.Users.size() - (int)OUG.Operands.size() >= (int)(IGC_GET_FLAG_VALUE(LoopSinkMinSaveUniform))))
15171521
{
15181522
return true;
15191523
}
15201524

15211525
// All instructions are part of a chain to already sinked load and don't
15221526
// increase pressure too much. It simplifies the code a little and without
15231527
// adding remat pass for simple cases
1524-
if (AccSave >= 0 && std::all_of(OUG->Users.begin(), OUG->Users.end(),
1528+
if (AccSave >= 0 && std::all_of(OUG.Users.begin(), OUG.Users.end(),
15251529
[&](Instruction *I) {return isLoadChain(I, LoadChains);}))
15261530
{
15271531
return true;
@@ -1563,111 +1567,91 @@ namespace IGC {
15631567
// Here we group all candidates based on its operands and select ones that definitely
15641568
// reduce the pressure.
15651569
//
1566-
OperandUseGroup *AllGroups = new OperandUseGroup[SinkCandidates.size()];
1567-
SmallVector<OperandUseGroup *, 16> InstUseInfo;
1568-
for (uint32_t i = 0, e = (uint32_t)SinkCandidates.size(); i < e; ++i)
1570+
1571+
SmallVector<OperandUseGroup, 16> InstUseInfo;
1572+
InstUseInfo.reserve(SinkCandidates.size());
1573+
1574+
for (Instruction *I : SinkCandidates)
15691575
{
1570-
Instruction *I = SinkCandidates[i];
1571-
SmallPtrSet<Value *, 4> theUses;
1576+
SmallPtrSet<Value *, 4> CandidateOperands;
15721577
for (Use &U : I->operands())
15731578
{
15741579
Value *V = U;
15751580
if (isa<Constant>(V) || isUsedInLoop(V, L))
15761581
continue;
15771582

1578-
theUses.insert(V);
1583+
CandidateOperands.insert(V);
15791584
}
15801585

15811586
// If this set of uses have been referenced by other instructions,
15821587
// put this inst in the same group. Note that we don't union sets
15831588
// that intersect each other.
1584-
uint32_t j, je = (uint32_t)InstUseInfo.size();
1585-
for (j = 0; j < je; ++j)
1589+
auto it = std::find_if(InstUseInfo.begin(), InstUseInfo.end(), [&](OperandUseGroup &OUG)
15861590
{
1587-
OperandUseGroup *OUG = InstUseInfo[j];
1588-
if (isSameSet(OUG->Operands, theUses)) {
1589-
OUG->Users.push_back(I);
1590-
break;
1591-
}
1592-
}
1591+
return isSameSet(OUG.Operands, CandidateOperands);
1592+
});
15931593

1594-
if (j == je) {
1595-
// No match found, create the new one.
1596-
OperandUseGroup &OUG = AllGroups[i];
1597-
OUG.Operands = std::move(theUses);
1598-
OUG.Users.push_back(I);
1599-
InstUseInfo.push_back(&OUG);
1600-
}
1594+
if (it != InstUseInfo.end())
1595+
it->Users.push_back(I);
1596+
else
1597+
InstUseInfo.push_back(OperandUseGroup{CandidateOperands, {I}});
16011598
}
16021599

1603-
bool EverChanged = false;
1604-
// Just a placeholder, all LIs considered here are ALUs.
1605-
SmallPtrSet<Instruction *, 16> Stores;
1606-
bool IterChanged;
1607-
uint32_t N = (uint32_t) InstUseInfo.size();
1608-
do {
1609-
IterChanged = false;
1610-
for (uint32_t i = 0; i < N; ++i)
1611-
{
1612-
OperandUseGroup *OUG = InstUseInfo[i];
1613-
if (!OUG)
1614-
continue;
1600+
// Sink the instructions from every group if they are beneficial
1601+
bool Changed = false;
1602+
for (OperandUseGroup &OUG : InstUseInfo)
1603+
{
16151604

1616-
PrintDump("Checking if sinking the group is beneficial:\n");
1617-
PrintOUGDump(OUG);
1605+
PrintDump("Checking if sinking the group is beneficial:\n");
1606+
PrintOUGDump(OUG);
16181607

1619-
if (!isBeneficialToSink(OUG))
1620-
continue;
1621-
PrintDump(">> Beneficial to sink.\n\n");
1608+
if (!isBeneficialToSink(OUG))
1609+
continue;
1610+
PrintDump(">> Beneficial to sink.\n\n");
16221611

1623-
bool GroupChanged = false;
1624-
for (int j = 0; j < (int)(OUG->Users.size()); ++j)
1612+
bool GroupChanged = false;
1613+
for (Instruction *I : OUG.Users)
1614+
{
1615+
Instruction *PrevLoc = I->getNextNode();
1616+
bool UserChanged = sinkInstruction(I);
1617+
if (UserChanged)
16251618
{
1626-
Instruction *I = OUG->Users[j];
1627-
Instruction *PrevLoc = I->getNextNode();
1628-
bool UserChanged = sinkInstruction(I);
1629-
if (UserChanged)
1630-
{
1631-
PrintDump("Sinking instruction:\n");
1632-
PrintInstructionDump(I);
1619+
PrintDump("Sinking instruction:\n");
1620+
PrintInstructionDump(I);
16331621

1634-
UndoLocas.push_back(PrevLoc);
1635-
MovedInsts.push_back(I);
1622+
UndoLocas.push_back(PrevLoc);
1623+
MovedInsts.push_back(I);
16361624

1637-
GroupChanged = true;
1638-
if (isa<LoadInst>(I) || isLoadChain(I, LoadChains))
1639-
{
1640-
LoadChains.insert(I);
1641-
}
1642-
}
1625+
GroupChanged = true;
1626+
if (isa<LoadInst>(I) || isLoadChain(I, LoadChains))
1627+
LoadChains.insert(I);
16431628
}
1644-
if (GroupChanged) {
1645-
IterChanged = true;
1646-
EverChanged = true;
1647-
1648-
// Since those operands become global already, remove
1649-
// them from the sets in the vector.
1650-
for (uint32_t k = 0; k < N; ++k)
1651-
{
1652-
OperandUseGroup *OUG1 = InstUseInfo[k];
1653-
if (k == i || !OUG1)
1654-
continue;
1629+
}
1630+
if (GroupChanged)
1631+
{
1632+
Changed = true;
1633+
1634+
// If the group is sinked, remove its operands from other groups
1635+
// So that the same operands were not considered in the next's group
1636+
// estimation of whether it's beneficial to sink the users.
1637+
//
1638+
// It's still useful if we don't sink all the users from the group, but sink at least one.
1639+
// Because if we sink, the operands of the sinked group become alive in the loop's body,
1640+
// so they should not be considered for the next group
1641+
for (OperandUseGroup &OUG1 : InstUseInfo)
1642+
{
1643+
// Just don't remove the operands from the same group
1644+
// so that we don't lose the operands set
1645+
if (&OUG1 == &OUG)
1646+
continue;
16551647

1656-
for (auto I : OUG->Operands) {
1657-
Value *V = I;
1658-
OUG1->Operands.erase(V);
1659-
}
1660-
}
1648+
for (Value *V : OUG.Operands)
1649+
OUG1.Operands.erase(V);
16611650
}
1662-
1663-
// Just set it to nullptr (erasing it would be more expensive).
1664-
InstUseInfo[i] = nullptr;
16651651
}
1666-
} while (IterChanged);
1667-
1668-
delete[] AllGroups;
1652+
}
16691653

1670-
return EverChanged;
1654+
return Changed;
16711655
}
16721656

16731657
// Find the target BB and move the instruction

IGC/Compiler/CISACodeGen/CodeSinking.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ namespace IGC {
154154
std::string Log;
155155
llvm::raw_string_ostream LogStream;
156156

157+
void dumpToFile(const std::string& Log);
158+
157159
// memoize all possible stores for every loop that is a candidate for sinking
158160
typedef llvm::SmallVector<llvm::Instruction*, 32> StoresVec;
159161
llvm::DenseMap<llvm::Loop*, StoresVec> MemoizedStoresInLoops;

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ void AddAnalysisPasses(CodeGenContext& ctx, IGCPassManager& mpm)
245245
mpm.add(new CodeSinking());
246246
if ((IGC_IS_FLAG_DISABLED(DisableLoopSink) || IGC_IS_FLAG_ENABLED(ForceLoopSink))
247247
&& ctx.type == ShaderType::OPENCL_SHADER
248-
&& ctx.m_instrTypes.numOfLoop > 0)
248+
&& ctx.m_instrTypes.numOfLoop > 0
249+
&& ctx.m_instrTypes.numInsts >= IGC_GET_FLAG_VALUE(CodeLoopSinkingMinSize))
249250
{
250251
mpm.add(new CodeLoopSinking());
251252
}

IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking-all-uniform.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys
9-
; RUN: igc_opt --regkey LoopSinkMinSave=1 --regkey LoopSinkMinSaveUniform=3 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-wi-analysis --igc-code-loop-sinking -S %s | FileCheck %s
9+
; RUN: igc_opt --regkey LoopSinkMinSave=1 --regkey LoopSinkMinSaveUniform=3 --regkey ForceLoopSink=1 --regkey CodeLoopSinkingMinSize=10 %enable-basic-aa% --igc-wi-analysis --igc-code-loop-sinking -S %s | FileCheck %s
1010
; We set LoopSinkMinSaveUniform=3, and check that only the case with saving 3 scalars is being sinked, when all the values are uniform
1111
define spir_kernel void @foo(float addrspace(1)* %in0, float addrspace(1)* %in1, float addrspace(1)* %out0, i32 %count, i16 %localIdX, i16 %localIdY, i16 %localIdZ) #0 {
1212
; CHECK-LABEL: @foo(

IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking-uniform.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys
9-
; RUN: igc_opt --regkey LoopSinkMinSave=1 --regkey ForceLoadsLoopSink=1 --regkey LoopSinkMinSaveUniform=10 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-wi-analysis --igc-code-loop-sinking -S %s | FileCheck %s
9+
; RUN: igc_opt --regkey LoopSinkMinSave=1 --regkey ForceLoadsLoopSink=1 --regkey LoopSinkMinSaveUniform=10 --regkey ForceLoopSink=1 --regkey CodeLoopSinkingMinSize=10 %enable-basic-aa% --igc-wi-analysis --igc-code-loop-sinking -S %s | FileCheck %s
1010
; We set LoopSinkMinSaveUniform=10, but in this test uniform vs non-uniform results in sinking
1111
define spir_kernel void @foo(float addrspace(1)* %in0, float addrspace(1)* %in1, float addrspace(1)* %out0, i32 %count, i16 %localIdX, i16 %localIdY, i16 %localIdZ) #0 {
1212
; CHECK-LABEL: @foo(

IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys
9-
; RUN: igc_opt --regkey LoopSinkMinSave=4 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-code-sinking --igc-code-loop-sinking -S %s | FileCheck %s
9+
; RUN: igc_opt --regkey LoopSinkMinSave=4 --regkey ForceLoopSink=1 --regkey CodeLoopSinkingMinSize=10 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-code-sinking --igc-code-loop-sinking -S %s | FileCheck %s
1010
define void @foo(float addrspace(1)* %in0, double addrspace(1)* %in1, float addrspace(1)* noalias %out0, i32 %count, i32 %offsetIn0, i32 %offsetIn2) {
1111
; CHECK-LABEL: @foo(
1212
; CHECK: entry:

0 commit comments

Comments
 (0)