@@ -1690,6 +1690,8 @@ namespace {
1690
1690
// / Models excess register pressure in a region and tracks our progress as we
1691
1691
// / identify rematerialization opportunities.
1692
1692
struct ExcessRP {
1693
+ // / Number of excess SGPRs.
1694
+ unsigned SGPRs = 0 ;
1693
1695
// / Number of excess ArchVGPRs.
1694
1696
unsigned ArchVGPRs = 0 ;
1695
1697
// / Number of excess AGPRs.
@@ -1705,26 +1707,34 @@ struct ExcessRP {
1705
1707
bool UnifiedRF;
1706
1708
1707
1709
// / Constructs the excess RP model; determines the excess pressure w.r.t. a
1708
- // / maximum number of allowed VGPRs.
1709
- ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1710
+ // / maximum number of allowed SGPRs/VGPRs.
1711
+ ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1712
+ unsigned MaxVGPRs);
1713
+
1714
+ // / Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1715
+ // / these SGPRs helped reduce excess pressure.
1716
+ bool saveSGPRs (unsigned NumRegs) { return saveRegs (SGPRs, NumRegs); }
1710
1717
1711
1718
// / Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
1712
1719
// / UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
1713
1720
// / AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
1714
1721
// / saving these ArchVGPRs helped reduce excess pressure.
1715
1722
bool saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
1716
1723
1717
- // / Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1718
- // / these ArchVGPRs helped reduce excess pressure.
1719
- bool saveAGPRs (unsigned NumRegs);
1724
+ // / Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1725
+ // / these AGPRs helped reduce excess pressure.
1726
+ bool saveAGPRs (unsigned NumRegs) {
1727
+ return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1728
+ }
1720
1729
1721
1730
// / Returns whether there is any excess register pressure.
1722
- operator bool () const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0 ; }
1731
+ operator bool () const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
1723
1732
1724
1733
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1725
1734
friend raw_ostream &operator <<(raw_ostream &OS, const ExcessRP &Excess) {
1726
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1727
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1735
+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1736
+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1737
+ << " VGPRs in total, next ArchVGPR aligment in "
1728
1738
<< Excess.ArchVGPRsToAlignment << " registers)\n " ;
1729
1739
return OS;
1730
1740
}
@@ -1741,12 +1751,17 @@ struct ExcessRP {
1741
1751
} // namespace
1742
1752
1743
1753
ExcessRP::ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP,
1744
- unsigned MaxVGPRs)
1754
+ unsigned MaxSGPRs, unsigned MaxVGPRs)
1745
1755
: UnifiedRF(ST.hasGFX90AInsts()) {
1756
+ // Compute excess SGPR pressure.
1757
+ unsigned NumSGPRs = RP.getSGPRNum ();
1758
+ if (NumSGPRs > MaxSGPRs)
1759
+ SGPRs = NumSGPRs - MaxSGPRs;
1760
+
1761
+ // Compute excess ArchVGPR/AGPR pressure.
1746
1762
unsigned NumArchVGPRs = RP.getArchVGPRNum ();
1747
1763
unsigned NumAGPRs = RP.getAGPRNum ();
1748
1764
HasAGPRs = NumAGPRs;
1749
-
1750
1765
if (!UnifiedRF) {
1751
1766
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
1752
1767
// independently.
@@ -1827,10 +1842,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
1827
1842
return Progress;
1828
1843
}
1829
1844
1830
- bool ExcessRP::saveAGPRs (unsigned NumRegs) {
1831
- return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1832
- }
1833
-
1834
1845
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
1835
1846
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG.TRI );
1836
1847
@@ -1853,46 +1864,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1853
1864
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 );
1854
1865
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1855
1866
1856
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1857
- if (Cond) {
1858
- // We won't try to increase occupancy.
1859
- IncreaseOccupancy = false ;
1860
- OptRegions.clear ();
1861
- }
1862
- return Cond;
1863
- };
1864
-
1865
1867
// Collect optimizable regions. If there is spilling in any region we will
1866
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1867
- // occupancy by one in the whole function.
1868
+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
1869
+ // one in the whole function.
1868
1870
for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1869
1871
GCNRegPressure &RP = DAG.Pressure [I];
1870
-
1871
- // Check whether SGPR pressures prevents us from eliminating spilling.
1872
- unsigned NumSGPRs = RP.getSGPRNum ();
1873
- if (NumSGPRs > MaxSGPRsNoSpill)
1874
- ClearOptRegionsIf (IncreaseOccupancy);
1875
-
1876
- ExcessRP Excess (ST, RP, MaxVGPRsNoSpill);
1877
- if (Excess) {
1878
- ClearOptRegionsIf (IncreaseOccupancy);
1872
+ ExcessRP Excess (ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
1873
+ if (Excess && IncreaseOccupancy) {
1874
+ // There is spilling in the region and we were so far trying to increase
1875
+ // occupancy. Strop trying that and focus on reducing spilling.
1876
+ IncreaseOccupancy = false ;
1877
+ OptRegions.clear ();
1879
1878
} else if (IncreaseOccupancy) {
1880
- // Check whether SGPR pressure prevents us from increasing occupancy.
1881
- if (ClearOptRegionsIf (NumSGPRs > MaxSGPRsIncOcc)) {
1882
- if (DAG.MinOccupancy >= WavesPerEU.first )
1883
- return false ;
1884
- continue ;
1885
- }
1886
- if ((Excess = ExcessRP (ST, RP, MaxVGPRsIncOcc))) {
1887
- // We can only rematerialize ArchVGPRs at this point.
1888
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs ;
1889
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum ();
1890
- if (ClearOptRegionsIf (Excess.AGPRs || NotEnoughArchVGPRs)) {
1891
- if (DAG.MinOccupancy >= WavesPerEU.first )
1892
- return false ;
1893
- continue ;
1894
- }
1895
- }
1879
+ Excess = ExcessRP (ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
1896
1880
}
1897
1881
if (Excess)
1898
1882
OptRegions.insert ({I, Excess});
@@ -1912,23 +1896,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1912
1896
#endif
1913
1897
1914
1898
// When we are reducing spilling, the target is the minimum target number of
1915
- // waves/EU determined by the subtarget.
1916
- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first ;
1899
+ // waves/EU determined by the subtarget. In cases where either one of
1900
+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1901
+ // minimum region occupancy may be higher than the latter.
1902
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1903
+ : std::max (DAG.MinOccupancy , WavesPerEU.first );
1917
1904
1918
1905
// Accounts for a reduction in RP in an optimizable region. Returns whether we
1919
1906
// estimate that we have identified enough rematerialization opportunities to
1920
1907
// achieve our goal, and sets Progress to true when this particular reduction
1921
1908
// in pressure was helpful toward that goal.
1922
1909
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1910
+ const TargetRegisterClass *RC,
1923
1911
bool &Progress) -> bool {
1924
1912
ExcessRP &Excess = OptIt->getSecond ();
1925
- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1926
- // only when we are just trying to eliminate spilling to memory. At this
1927
- // point we err on the conservative side and do not increase
1928
- // register-to-register spilling for the sake of increasing occupancy.
1929
- Progress |=
1930
- Excess.saveArchVGPRs (SIRegisterInfo::getNumCoveredRegs (Mask),
1931
- /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1913
+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs (Mask);
1914
+ if (SRI->isSGPRClass (RC)) {
1915
+ Progress |= Excess.saveSGPRs (NumRegs);
1916
+ } else if (SRI->isAGPRClass (RC)) {
1917
+ Progress |= Excess.saveAGPRs (NumRegs);
1918
+ } else {
1919
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1920
+ // only when we are just trying to eliminate spilling to memory. At this
1921
+ // point we err on the conservative side and do not increase
1922
+ // register-to-register spilling for the sake of increasing occupancy.
1923
+ Progress |=
1924
+ Excess.saveArchVGPRs (NumRegs,
1925
+ /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1926
+ }
1932
1927
if (!Excess)
1933
1928
OptRegions.erase (OptIt->getFirst ());
1934
1929
return OptRegions.empty ();
@@ -1950,10 +1945,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1950
1945
if (!isTriviallyReMaterializable (DefMI))
1951
1946
continue ;
1952
1947
1953
- // We only support rematerializing virtual VGPRs with one definition.
1948
+ // We only support rematerializing virtual registers with one definition.
1954
1949
Register Reg = DefMI.getOperand (0 ).getReg ();
1955
- if (!Reg.isVirtual () || !SRI->isVGPRClass (DAG.MRI .getRegClass (Reg)) ||
1956
- !DAG.MRI .hasOneDef (Reg))
1950
+ if (!Reg.isVirtual () || !DAG.MRI .hasOneDef (Reg))
1957
1951
continue ;
1958
1952
1959
1953
// We only care to rematerialize the instruction if it has a single
@@ -1991,14 +1985,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1991
1985
Rematerializations.try_emplace (&DefMI, UseMI).first ->second ;
1992
1986
1993
1987
bool RematUseful = false ;
1988
+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
1994
1989
if (auto It = OptRegions.find (I); It != OptRegions.end ()) {
1995
1990
// Optimistically consider that moving the instruction out of its
1996
1991
// defining region will reduce RP in the latter; this assumes that
1997
1992
// maximum RP in the region is reached somewhere between the defining
1998
1993
// instruction and the end of the region.
1999
1994
REMAT_DEBUG (dbgs () << " Defining region is optimizable\n " );
2000
1995
LaneBitmask Mask = DAG.RegionLiveOuts .getLiveRegsForRegionIdx (I)[Reg];
2001
- if (ReduceRPInRegion (It, Mask, RematUseful))
1996
+ if (ReduceRPInRegion (It, Mask, RC, RematUseful))
2002
1997
return true ;
2003
1998
}
2004
1999
@@ -2018,7 +2013,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2018
2013
// instruction's use.
2019
2014
if (auto It = OptRegions.find (LIRegion); It != OptRegions.end ()) {
2020
2015
REMAT_DEBUG (dbgs () << " Live-in in region " << LIRegion << ' \n ' );
2021
- if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RematUseful))
2016
+ if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RC, RematUseful))
2022
2017
return true ;
2023
2018
}
2024
2019
}
0 commit comments