@@ -815,7 +815,7 @@ namespace IGC {
815
815
// Helper functions for loop sink debug dumps
816
816
#define PrintDump (Contents ) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {LogStream << Contents;}
817
817
#define PrintInstructionDump (Inst ) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {Inst->print (LogStream, false ); LogStream << " \n " ;}
818
- #define PrintOUGDump (OUG ) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG-> print (LogStream); LogStream << " \n " ;}
818
+ #define PrintOUGDump (OUG ) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG. print (LogStream); LogStream << " \n " ;}
819
819
820
820
821
821
// Register pass to igc-opt
@@ -851,7 +851,7 @@ namespace IGC {
851
851
return false ;
852
852
853
853
if (IGC_IS_FLAG_ENABLED (DisableCodeSinking) ||
854
- numInsts (F) < IGC_GET_FLAG_VALUE (CodeSinkingMinSize ))
854
+ numInsts (F) < IGC_GET_FLAG_VALUE (CodeLoopSinkingMinSize ))
855
855
{
856
856
return false ;
857
857
}
@@ -900,31 +900,30 @@ namespace IGC {
900
900
if (IGC_IS_FLAG_ENABLED (DumpLoopSink))
901
901
{
902
902
if (IGC_IS_FLAG_ENABLED (PrintToConsole))
903
- {
904
903
IGC::Debug::ods () << Log;
905
- }
906
904
else
907
- {
908
- auto Name = Debug::DumpName (IGC::Debug::GetShaderOutputName ())
909
- .Hash (CTX->hash )
910
- .Type (CTX->type )
911
- .Retry (CTX->m_retryManager .GetRetryId ())
912
- .Pass (" loopsink" )
913
- .Extension (" txt" );
914
- IGC::Debug::DumpLock ();
915
- std::ofstream OutputFile (Name.str (), std::ios_base::app);
916
- if (OutputFile.is_open ())
917
- {
918
- OutputFile << Log;
919
- }
920
- OutputFile.close ();
921
- IGC::Debug::DumpUnlock ();
922
- }
905
+ dumpToFile (Log);
923
906
}
924
907
925
908
return Changed;
926
909
}
927
910
911
+ void CodeLoopSinking::dumpToFile (const std::string& Log)
912
+ {
913
+ auto Name = Debug::DumpName (IGC::Debug::GetShaderOutputName ())
914
+ .Hash (CTX->hash )
915
+ .Type (CTX->type )
916
+ .Retry (CTX->m_retryManager .GetRetryId ())
917
+ .Pass (" loopsink" )
918
+ .Extension (" txt" );
919
+ IGC::Debug::DumpLock ();
920
+ std::ofstream OutputFile (Name.str (), std::ios_base::app);
921
+ if (OutputFile.is_open ())
922
+ OutputFile << Log;
923
+ OutputFile.close ();
924
+ IGC::Debug::DumpUnlock ();
925
+ }
926
+
928
927
// Implementation of RPE->getMaxRegCountForLoop(*L, SIMD);
929
928
// with per-BB pressure caching to improve compile-time
930
929
uint CodeLoopSinking::getMaxRegCountForLoop (Loop *L)
@@ -974,6 +973,22 @@ namespace IGC {
974
973
uint NGRF = CTX->getNumGRFPerThread ();
975
974
uint SIMD = numLanes (RPE->bestGuessSIMDSize ());
976
975
976
+ PrintDump (" \n " );
977
+ if (!Preheader->getName ().empty ())
978
+ {
979
+ PrintDump (" Checking loop with preheader " << Preheader->getName () << " : \n " );
980
+ }
981
+ else if (!Preheader->empty ())
982
+ {
983
+ PrintDump (" Checking loop with unnamed preheader. First preheader instruction:\n " );
984
+ Instruction* First = &Preheader->front ();
985
+ PrintInstructionDump (First);
986
+ }
987
+ else
988
+ {
989
+ PrintDump (" Checking loop with unnamed empty preheader." );
990
+ }
991
+
977
992
// Estimate preheader's potential to sink
978
993
ValueSet PreheaderDefs = RPE->getDefs (*Preheader);
979
994
// Filter out preheader defined values that are used not in the loop or not supported
@@ -987,6 +1002,13 @@ namespace IGC {
987
1002
PreheaderDefsCandidates.insert (V);
988
1003
}
989
1004
}
1005
+
1006
+ if (PreheaderDefsCandidates.empty ())
1007
+ {
1008
+ PrintDump (" >> No sinking candidates in the preheader.\n " );
1009
+ return LoopSinkMode::NoSink;
1010
+ }
1011
+
990
1012
uint PreheaderDefsSizeInBytes = RPE->estimateSizeInBytes (PreheaderDefsCandidates, *F, SIMD, &WI);
991
1013
uint PreheaderDefsSizeInRegs = RPE->bytesToRegisters (PreheaderDefsSizeInBytes);
992
1014
@@ -1002,22 +1024,6 @@ namespace IGC {
1002
1024
(PreheaderDefsSizeInRegs > (MaxLoopPressure - NGRF) * LOOPSINK_PREHEADER_IMPACT_THRESHOLD));
1003
1025
};
1004
1026
1005
- PrintDump (" \n " );
1006
- if (!Preheader->getName ().empty ())
1007
- {
1008
- PrintDump (" Checking loop with preheader " << Preheader->getName () << " : \n " );
1009
- }
1010
- else if (!Preheader->empty ())
1011
- {
1012
- PrintDump (" Checking loop with unnamed preheader. First preheader instruction:\n " );
1013
- Instruction* First = &Preheader->front ();
1014
- PrintInstructionDump (First);
1015
- }
1016
- else
1017
- {
1018
- PrintDump (" Checking loop with unnamed empty preheader." );
1019
- }
1020
-
1021
1027
PrintDump (" Threshold to sink = " << NGRF + GRFThresholdDelta << " \n " );
1022
1028
PrintDump (" MaxLoopPressure = " << MaxLoopPressure << " \n " );
1023
1029
PrintDump (" MaxLoopPressure + FunctionExternalPressure = " << MaxLoopPressure + FunctionExternalPressure << " \n " );
@@ -1453,7 +1459,7 @@ namespace IGC {
1453
1459
};
1454
1460
1455
1461
// Check if it's beneficial to sink it in the loop
1456
- auto isBeneficialToSink = [&](OperandUseGroup * OUG)-> bool
1462
+ auto isBeneficialToSink = [&](OperandUseGroup & OUG)-> bool
1457
1463
{
1458
1464
auto getDstSize = [this ](Value *V)
1459
1465
{
@@ -1472,10 +1478,8 @@ namespace IGC {
1472
1478
return DstSize;
1473
1479
};
1474
1480
1475
- IGC_ASSERT (OUG);
1476
-
1477
1481
// All instructions are safe to sink always or consume larger type than produce
1478
- if (std::all_of (OUG-> Users .begin (), OUG-> Users .end (),
1482
+ if (std::all_of (OUG. Users .begin (), OUG. Users .end (),
1479
1483
[this ](Instruction *I)
1480
1484
{
1481
1485
return isAlwaysSinkInstruction (I) || isCastInstrReducingPressure (I, false );
@@ -1489,7 +1493,7 @@ namespace IGC {
1489
1493
// is uniform, but the User (instruction to sink) is uniform, we'll decide it's beneficial to sink
1490
1494
int AccSave = 0 ;
1491
1495
1492
- for (Value *V : OUG-> Operands )
1496
+ for (Value *V : OUG. Operands )
1493
1497
{
1494
1498
int DstSize = getDstSize (V);
1495
1499
if (!DstSize)
@@ -1500,7 +1504,7 @@ namespace IGC {
1500
1504
}
1501
1505
1502
1506
bool AllUsersAreUniform = true ;
1503
- for (Value *V : OUG-> Users )
1507
+ for (Value *V : OUG. Users )
1504
1508
{
1505
1509
int DstSize = getDstSize (V);
1506
1510
if (!DstSize)
@@ -1513,15 +1517,15 @@ namespace IGC {
1513
1517
1514
1518
// If all uses are uniform, and we save enough SSA-values it's still beneficial
1515
1519
if (AccSave >= 0 && AllUsersAreUniform &&
1516
- ((int )OUG-> Users .size () - (int )OUG-> Operands .size () >= (int )(IGC_GET_FLAG_VALUE (LoopSinkMinSaveUniform))))
1520
+ ((int )OUG. Users .size () - (int )OUG. Operands .size () >= (int )(IGC_GET_FLAG_VALUE (LoopSinkMinSaveUniform))))
1517
1521
{
1518
1522
return true ;
1519
1523
}
1520
1524
1521
1525
// All instructions are part of a chain to already sinked load and don't
1522
1526
// increase pressure too much. It simplifies the code a little and without
1523
1527
// adding remat pass for simple cases
1524
- if (AccSave >= 0 && std::all_of (OUG-> Users .begin (), OUG-> Users .end (),
1528
+ if (AccSave >= 0 && std::all_of (OUG. Users .begin (), OUG. Users .end (),
1525
1529
[&](Instruction *I) {return isLoadChain (I, LoadChains);}))
1526
1530
{
1527
1531
return true ;
@@ -1563,111 +1567,91 @@ namespace IGC {
1563
1567
// Here we group all candidates based on its operands and select ones that definitely
1564
1568
// reduce the pressure.
1565
1569
//
1566
- OperandUseGroup *AllGroups = new OperandUseGroup[SinkCandidates.size ()];
1567
- SmallVector<OperandUseGroup *, 16 > InstUseInfo;
1568
- for (uint32_t i = 0 , e = (uint32_t )SinkCandidates.size (); i < e; ++i)
1570
+
1571
+ SmallVector<OperandUseGroup, 16 > InstUseInfo;
1572
+ InstUseInfo.reserve (SinkCandidates.size ());
1573
+
1574
+ for (Instruction *I : SinkCandidates)
1569
1575
{
1570
- Instruction *I = SinkCandidates[i];
1571
- SmallPtrSet<Value *, 4 > theUses;
1576
+ SmallPtrSet<Value *, 4 > CandidateOperands;
1572
1577
for (Use &U : I->operands ())
1573
1578
{
1574
1579
Value *V = U;
1575
1580
if (isa<Constant>(V) || isUsedInLoop (V, L))
1576
1581
continue ;
1577
1582
1578
- theUses .insert (V);
1583
+ CandidateOperands .insert (V);
1579
1584
}
1580
1585
1581
1586
// If this set of uses have been referenced by other instructions,
1582
1587
// put this inst in the same group. Note that we don't union sets
1583
1588
// that intersect each other.
1584
- uint32_t j, je = (uint32_t )InstUseInfo.size ();
1585
- for (j = 0 ; j < je; ++j)
1589
+ auto it = std::find_if (InstUseInfo.begin (), InstUseInfo.end (), [&](OperandUseGroup &OUG)
1586
1590
{
1587
- OperandUseGroup *OUG = InstUseInfo[j];
1588
- if (isSameSet (OUG->Operands , theUses)) {
1589
- OUG->Users .push_back (I);
1590
- break ;
1591
- }
1592
- }
1591
+ return isSameSet (OUG.Operands , CandidateOperands);
1592
+ });
1593
1593
1594
- if (j == je) {
1595
- // No match found, create the new one.
1596
- OperandUseGroup &OUG = AllGroups[i];
1597
- OUG.Operands = std::move (theUses);
1598
- OUG.Users .push_back (I);
1599
- InstUseInfo.push_back (&OUG);
1600
- }
1594
+ if (it != InstUseInfo.end ())
1595
+ it->Users .push_back (I);
1596
+ else
1597
+ InstUseInfo.push_back (OperandUseGroup{CandidateOperands, {I}});
1601
1598
}
1602
1599
1603
- bool EverChanged = false ;
1604
- // Just a placeholder, all LIs considered here are ALUs.
1605
- SmallPtrSet<Instruction *, 16 > Stores;
1606
- bool IterChanged;
1607
- uint32_t N = (uint32_t ) InstUseInfo.size ();
1608
- do {
1609
- IterChanged = false ;
1610
- for (uint32_t i = 0 ; i < N; ++i)
1611
- {
1612
- OperandUseGroup *OUG = InstUseInfo[i];
1613
- if (!OUG)
1614
- continue ;
1600
+ // Sink the instructions from every group if they are beneficial
1601
+ bool Changed = false ;
1602
+ for (OperandUseGroup &OUG : InstUseInfo)
1603
+ {
1615
1604
1616
- PrintDump (" Checking if sinking the group is beneficial:\n " );
1617
- PrintOUGDump (OUG);
1605
+ PrintDump (" Checking if sinking the group is beneficial:\n " );
1606
+ PrintOUGDump (OUG);
1618
1607
1619
- if (!isBeneficialToSink (OUG))
1620
- continue ;
1621
- PrintDump (" >> Beneficial to sink.\n\n " );
1608
+ if (!isBeneficialToSink (OUG))
1609
+ continue ;
1610
+ PrintDump (" >> Beneficial to sink.\n\n " );
1622
1611
1623
- bool GroupChanged = false ;
1624
- for (int j = 0 ; j < (int )(OUG->Users .size ()); ++j)
1612
+ bool GroupChanged = false ;
1613
+ for (Instruction *I : OUG.Users )
1614
+ {
1615
+ Instruction *PrevLoc = I->getNextNode ();
1616
+ bool UserChanged = sinkInstruction (I);
1617
+ if (UserChanged)
1625
1618
{
1626
- Instruction *I = OUG->Users [j];
1627
- Instruction *PrevLoc = I->getNextNode ();
1628
- bool UserChanged = sinkInstruction (I);
1629
- if (UserChanged)
1630
- {
1631
- PrintDump (" Sinking instruction:\n " );
1632
- PrintInstructionDump (I);
1619
+ PrintDump (" Sinking instruction:\n " );
1620
+ PrintInstructionDump (I);
1633
1621
1634
- UndoLocas.push_back (PrevLoc);
1635
- MovedInsts.push_back (I);
1622
+ UndoLocas.push_back (PrevLoc);
1623
+ MovedInsts.push_back (I);
1636
1624
1637
- GroupChanged = true ;
1638
- if (isa<LoadInst>(I) || isLoadChain (I, LoadChains))
1639
- {
1640
- LoadChains.insert (I);
1641
- }
1642
- }
1625
+ GroupChanged = true ;
1626
+ if (isa<LoadInst>(I) || isLoadChain (I, LoadChains))
1627
+ LoadChains.insert (I);
1643
1628
}
1644
- if (GroupChanged) {
1645
- IterChanged = true ;
1646
- EverChanged = true ;
1647
-
1648
- // Since those operands become global already, remove
1649
- // them from the sets in the vector.
1650
- for (uint32_t k = 0 ; k < N; ++k)
1651
- {
1652
- OperandUseGroup *OUG1 = InstUseInfo[k];
1653
- if (k == i || !OUG1)
1654
- continue ;
1629
+ }
1630
+ if (GroupChanged)
1631
+ {
1632
+ Changed = true ;
1633
+
1634
+ // If the group is sinked, remove its operands from other groups
1635
+ // So that the same operands were not considered in the next's group
1636
+ // estimation of whether it's beneficial to sink the users.
1637
+ //
1638
+ // It's still useful if we don't sink all the users from the group, but sink at least one.
1639
+ // Because if we sink, the operands of the sinked group become alive in the loop's body,
1640
+ // so they should not be considered for the next group
1641
+ for (OperandUseGroup &OUG1 : InstUseInfo)
1642
+ {
1643
+ // Just don't remove the operands from the same group
1644
+ // so that we don't lose the operands set
1645
+ if (&OUG1 == &OUG)
1646
+ continue ;
1655
1647
1656
- for (auto I : OUG->Operands ) {
1657
- Value *V = I;
1658
- OUG1->Operands .erase (V);
1659
- }
1660
- }
1648
+ for (Value *V : OUG.Operands )
1649
+ OUG1.Operands .erase (V);
1661
1650
}
1662
-
1663
- // Just set it to nullptr (erasing it would be more expensive).
1664
- InstUseInfo[i] = nullptr ;
1665
1651
}
1666
- } while (IterChanged);
1667
-
1668
- delete[] AllGroups;
1652
+ }
1669
1653
1670
- return EverChanged ;
1654
+ return Changed ;
1671
1655
}
1672
1656
1673
1657
// Find the target BB and move the instruction
0 commit comments