@@ -1702,6 +1702,8 @@ namespace {
1702
1702
// / Models excess register pressure in a region and tracks our progress as we
1703
1703
// / identify rematerialization opportunities.
1704
1704
struct ExcessRP {
1705
+ // / Number of excess SGPRs.
1706
+ unsigned SGPRs = 0 ;
1705
1707
// / Number of excess ArchVGPRs.
1706
1708
unsigned ArchVGPRs = 0 ;
1707
1709
// / Number of excess AGPRs.
@@ -1717,26 +1719,34 @@ struct ExcessRP {
1717
1719
bool UnifiedRF;
1718
1720
1719
1721
// / Constructs the excess RP model; determines the excess pressure w.r.t. a
1720
- // / maximum number of allowed VGPRs.
1721
- ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1722
+ // / maximum number of allowed SGPRs/VGPRs.
1723
+ ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1724
+ unsigned MaxVGPRs);
1725
+
1726
+ // / Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1727
+ // / these SGPRs helped reduce excess pressure.
1728
+ bool saveSGPRs (unsigned NumRegs) { return saveRegs (SGPRs, NumRegs); }
1722
1729
1723
1730
// / Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
1724
1731
// / UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
1725
1732
// / AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
1726
1733
// / saving these ArchVGPRs helped reduce excess pressure.
1727
1734
bool saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
1728
1735
1729
- // / Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1730
- // / these ArchVGPRs helped reduce excess pressure.
1731
- bool saveAGPRs (unsigned NumRegs);
1736
+ // / Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1737
+ // / these AGPRs helped reduce excess pressure.
1738
+ bool saveAGPRs (unsigned NumRegs) {
1739
+ return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1740
+ }
1732
1741
1733
1742
// / Returns whether there is any excess register pressure.
1734
- operator bool () const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0 ; }
1743
+ operator bool () const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
1735
1744
1736
1745
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1737
1746
friend raw_ostream &operator <<(raw_ostream &OS, const ExcessRP &Excess) {
1738
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1739
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1747
+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1748
+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1749
+ << " VGPRs in total, next ArchVGPR aligment in "
1740
1750
<< Excess.ArchVGPRsToAlignment << " registers)\n " ;
1741
1751
return OS;
1742
1752
}
@@ -1753,12 +1763,17 @@ struct ExcessRP {
1753
1763
} // namespace
1754
1764
1755
1765
ExcessRP::ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP,
1756
- unsigned MaxVGPRs)
1766
+ unsigned MaxSGPRs, unsigned MaxVGPRs)
1757
1767
: UnifiedRF(ST.hasGFX90AInsts()) {
1768
+ // Compute excess SGPR pressure.
1769
+ unsigned NumSGPRs = RP.getSGPRNum ();
1770
+ if (NumSGPRs > MaxSGPRs)
1771
+ SGPRs = NumSGPRs - MaxSGPRs;
1772
+
1773
+ // Compute excess ArchVGPR/AGPR pressure.
1758
1774
unsigned NumArchVGPRs = RP.getArchVGPRNum ();
1759
1775
unsigned NumAGPRs = RP.getAGPRNum ();
1760
1776
HasAGPRs = NumAGPRs;
1761
-
1762
1777
if (!UnifiedRF) {
1763
1778
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
1764
1779
// independently.
@@ -1839,10 +1854,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
1839
1854
return Progress;
1840
1855
}
1841
1856
1842
- bool ExcessRP::saveAGPRs (unsigned NumRegs) {
1843
- return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1844
- }
1845
-
1846
1857
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
1847
1858
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG.TRI );
1848
1859
@@ -1865,46 +1876,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1865
1876
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 );
1866
1877
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1867
1878
1868
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1869
- if (Cond) {
1870
- // We won't try to increase occupancy.
1871
- IncreaseOccupancy = false ;
1872
- OptRegions.clear ();
1873
- }
1874
- return Cond;
1875
- };
1876
-
1877
1879
// Collect optimizable regions. If there is spilling in any region we will
1878
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1879
- // occupancy by one in the whole function.
1880
+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
1881
+ // one in the whole function.
1880
1882
for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1881
1883
GCNRegPressure &RP = DAG.Pressure [I];
1882
-
1883
- // Check whether SGPR pressures prevents us from eliminating spilling.
1884
- unsigned NumSGPRs = RP.getSGPRNum ();
1885
- if (NumSGPRs > MaxSGPRsNoSpill)
1886
- ClearOptRegionsIf (IncreaseOccupancy);
1887
-
1888
- ExcessRP Excess (ST, RP, MaxVGPRsNoSpill);
1889
- if (Excess) {
1890
- ClearOptRegionsIf (IncreaseOccupancy);
1884
+ ExcessRP Excess (ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
1885
+ if (Excess && IncreaseOccupancy) {
1886
+ // There is spilling in the region and we were so far trying to increase
1887
+ // occupancy. Strop trying that and focus on reducing spilling.
1888
+ IncreaseOccupancy = false ;
1889
+ OptRegions.clear ();
1891
1890
} else if (IncreaseOccupancy) {
1892
- // Check whether SGPR pressure prevents us from increasing occupancy.
1893
- if (ClearOptRegionsIf (NumSGPRs > MaxSGPRsIncOcc)) {
1894
- if (DAG.MinOccupancy >= WavesPerEU.first )
1895
- return false ;
1896
- continue ;
1897
- }
1898
- if ((Excess = ExcessRP (ST, RP, MaxVGPRsIncOcc))) {
1899
- // We can only rematerialize ArchVGPRs at this point.
1900
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs ;
1901
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum ();
1902
- if (ClearOptRegionsIf (Excess.AGPRs || NotEnoughArchVGPRs)) {
1903
- if (DAG.MinOccupancy >= WavesPerEU.first )
1904
- return false ;
1905
- continue ;
1906
- }
1907
- }
1891
+ Excess = ExcessRP (ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
1908
1892
}
1909
1893
if (Excess)
1910
1894
OptRegions.insert ({I, Excess});
@@ -1924,23 +1908,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1924
1908
#endif
1925
1909
1926
1910
// When we are reducing spilling, the target is the minimum target number of
1927
- // waves/EU determined by the subtarget.
1928
- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first ;
1911
+ // waves/EU determined by the subtarget. In cases where either one of
1912
+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1913
+ // minimum region occupancy may be higher than the latter.
1914
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1915
+ : std::max (DAG.MinOccupancy , WavesPerEU.first );
1929
1916
1930
1917
// Accounts for a reduction in RP in an optimizable region. Returns whether we
1931
1918
// estimate that we have identified enough rematerialization opportunities to
1932
1919
// achieve our goal, and sets Progress to true when this particular reduction
1933
1920
// in pressure was helpful toward that goal.
1934
1921
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1922
+ const TargetRegisterClass *RC,
1935
1923
bool &Progress) -> bool {
1936
1924
ExcessRP &Excess = OptIt->getSecond ();
1937
- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1938
- // only when we are just trying to eliminate spilling to memory. At this
1939
- // point we err on the conservative side and do not increase
1940
- // register-to-register spilling for the sake of increasing occupancy.
1941
- Progress |=
1942
- Excess.saveArchVGPRs (SIRegisterInfo::getNumCoveredRegs (Mask),
1943
- /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1925
+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs (Mask);
1926
+ if (SRI->isSGPRClass (RC)) {
1927
+ Progress |= Excess.saveSGPRs (NumRegs);
1928
+ } else if (SRI->isAGPRClass (RC)) {
1929
+ Progress |= Excess.saveAGPRs (NumRegs);
1930
+ } else {
1931
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1932
+ // only when we are just trying to eliminate spilling to memory. At this
1933
+ // point we err on the conservative side and do not increase
1934
+ // register-to-register spilling for the sake of increasing occupancy.
1935
+ Progress |=
1936
+ Excess.saveArchVGPRs (NumRegs,
1937
+ /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1938
+ }
1944
1939
if (!Excess)
1945
1940
OptRegions.erase (OptIt->getFirst ());
1946
1941
return OptRegions.empty ();
@@ -1962,10 +1957,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1962
1957
if (!isTriviallyReMaterializable (DefMI))
1963
1958
continue ;
1964
1959
1965
- // We only support rematerializing virtual VGPRs with one definition.
1960
+ // We only support rematerializing virtual registers with one definition.
1966
1961
Register Reg = DefMI.getOperand (0 ).getReg ();
1967
- if (!Reg.isVirtual () || !SRI->isVGPRClass (DAG.MRI .getRegClass (Reg)) ||
1968
- !DAG.MRI .hasOneDef (Reg))
1962
+ if (!Reg.isVirtual () || !DAG.MRI .hasOneDef (Reg))
1969
1963
continue ;
1970
1964
1971
1965
// We only care to rematerialize the instruction if it has a single
@@ -2003,14 +1997,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2003
1997
Rematerializations.try_emplace (&DefMI, UseMI).first ->second ;
2004
1998
2005
1999
bool RematUseful = false ;
2000
+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
2006
2001
if (auto It = OptRegions.find (I); It != OptRegions.end ()) {
2007
2002
// Optimistically consider that moving the instruction out of its
2008
2003
// defining region will reduce RP in the latter; this assumes that
2009
2004
// maximum RP in the region is reached somewhere between the defining
2010
2005
// instruction and the end of the region.
2011
2006
REMAT_DEBUG (dbgs () << " Defining region is optimizable\n " );
2012
2007
LaneBitmask Mask = DAG.RegionLiveOuts .getLiveRegsForRegionIdx (I)[Reg];
2013
- if (ReduceRPInRegion (It, Mask, RematUseful))
2008
+ if (ReduceRPInRegion (It, Mask, RC, RematUseful))
2014
2009
return true ;
2015
2010
}
2016
2011
@@ -2030,7 +2025,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2030
2025
// instruction's use.
2031
2026
if (auto It = OptRegions.find (LIRegion); It != OptRegions.end ()) {
2032
2027
REMAT_DEBUG (dbgs () << " Live-in in region " << LIRegion << ' \n ' );
2033
- if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RematUseful))
2028
+ if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RC, RematUseful))
2034
2029
return true ;
2035
2030
}
2036
2031
}
0 commit comments