@@ -1692,6 +1692,8 @@ namespace {
1692
1692
// / Models excess register pressure in a region and tracks our progress as we
1693
1693
// / identify rematerialization opportunities.
1694
1694
struct ExcessRP {
1695
+ // / Number of excess SGPRs.
1696
+ unsigned SGPRs = 0 ;
1695
1697
// / Number of excess ArchVGPRs.
1696
1698
unsigned ArchVGPRs = 0 ;
1697
1699
// / Number of excess AGPRs.
@@ -1707,26 +1709,34 @@ struct ExcessRP {
1707
1709
bool UnifiedRF;
1708
1710
1709
1711
// / Constructs the excess RP model; determines the excess pressure w.r.t. a
1710
- // / maximum number of allowed VGPRs.
1711
- ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1712
+ // / maximum number of allowed SGPRs/VGPRs.
1713
+ ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1714
+ unsigned MaxVGPRs);
1715
+
1716
+ // / Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1717
+ // / these SGPRs helped reduce excess pressure.
1718
+ bool saveSGPRs (unsigned NumRegs) { return saveRegs (SGPRs, NumRegs); }
1712
1719
1713
1720
// / Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
1714
1721
// / UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
1715
1722
// / AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
1716
1723
// / saving these ArchVGPRs helped reduce excess pressure.
1717
1724
bool saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
1718
1725
1719
- // / Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1720
- // / these ArchVGPRs helped reduce excess pressure.
1721
- bool saveAGPRs (unsigned NumRegs);
1726
+ // / Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1727
+ // / these AGPRs helped reduce excess pressure.
1728
+ bool saveAGPRs (unsigned NumRegs) {
1729
+ return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1730
+ }
1722
1731
1723
1732
// / Returns whether there is any excess register pressure.
1724
- operator bool () const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0 ; }
1733
+ operator bool () const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
1725
1734
1726
1735
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1727
1736
friend raw_ostream &operator <<(raw_ostream &OS, const ExcessRP &Excess) {
1728
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1729
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1737
+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1738
+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1739
+ << " VGPRs in total, next ArchVGPR aligment in "
1730
1740
<< Excess.ArchVGPRsToAlignment << " registers)\n " ;
1731
1741
return OS;
1732
1742
}
@@ -1743,12 +1753,17 @@ struct ExcessRP {
1743
1753
} // namespace
1744
1754
1745
1755
ExcessRP::ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP,
1746
- unsigned MaxVGPRs)
1756
+ unsigned MaxSGPRs, unsigned MaxVGPRs)
1747
1757
: UnifiedRF(ST.hasGFX90AInsts()) {
1758
+ // Compute excess SGPR pressure.
1759
+ unsigned NumSGPRs = RP.getSGPRNum ();
1760
+ if (NumSGPRs > MaxSGPRs)
1761
+ SGPRs = NumSGPRs - MaxSGPRs;
1762
+
1763
+ // Compute excess ArchVGPR/AGPR pressure.
1748
1764
unsigned NumArchVGPRs = RP.getArchVGPRNum ();
1749
1765
unsigned NumAGPRs = RP.getAGPRNum ();
1750
1766
HasAGPRs = NumAGPRs;
1751
-
1752
1767
if (!UnifiedRF) {
1753
1768
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
1754
1769
// independently.
@@ -1829,10 +1844,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
1829
1844
return Progress;
1830
1845
}
1831
1846
1832
- bool ExcessRP::saveAGPRs (unsigned NumRegs) {
1833
- return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1834
- }
1835
-
1836
1847
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
1837
1848
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG.TRI );
1838
1849
@@ -1855,46 +1866,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1855
1866
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 );
1856
1867
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1857
1868
1858
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1859
- if (Cond) {
1860
- // We won't try to increase occupancy.
1861
- IncreaseOccupancy = false ;
1862
- OptRegions.clear ();
1863
- }
1864
- return Cond;
1865
- };
1866
-
1867
1869
// Collect optimizable regions. If there is spilling in any region we will
1868
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1869
- // occupancy by one in the whole function.
1870
+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
1871
+ // one in the whole function.
1870
1872
for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1871
1873
GCNRegPressure &RP = DAG.Pressure [I];
1872
-
1873
- // Check whether SGPR pressures prevents us from eliminating spilling.
1874
- unsigned NumSGPRs = RP.getSGPRNum ();
1875
- if (NumSGPRs > MaxSGPRsNoSpill)
1876
- ClearOptRegionsIf (IncreaseOccupancy);
1877
-
1878
- ExcessRP Excess (ST, RP, MaxVGPRsNoSpill);
1879
- if (Excess) {
1880
- ClearOptRegionsIf (IncreaseOccupancy);
1874
+ ExcessRP Excess (ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
1875
+ if (Excess && IncreaseOccupancy) {
1876
+ // There is spilling in the region and we were so far trying to increase
1877
+ // occupancy. Strop trying that and focus on reducing spilling.
1878
+ IncreaseOccupancy = false ;
1879
+ OptRegions.clear ();
1881
1880
} else if (IncreaseOccupancy) {
1882
- // Check whether SGPR pressure prevents us from increasing occupancy.
1883
- if (ClearOptRegionsIf (NumSGPRs > MaxSGPRsIncOcc)) {
1884
- if (DAG.MinOccupancy >= WavesPerEU.first )
1885
- return false ;
1886
- continue ;
1887
- }
1888
- if ((Excess = ExcessRP (ST, RP, MaxVGPRsIncOcc))) {
1889
- // We can only rematerialize ArchVGPRs at this point.
1890
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs ;
1891
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum ();
1892
- if (ClearOptRegionsIf (Excess.AGPRs || NotEnoughArchVGPRs)) {
1893
- if (DAG.MinOccupancy >= WavesPerEU.first )
1894
- return false ;
1895
- continue ;
1896
- }
1897
- }
1881
+ Excess = ExcessRP (ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
1898
1882
}
1899
1883
if (Excess)
1900
1884
OptRegions.insert ({I, Excess});
@@ -1914,23 +1898,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1914
1898
#endif
1915
1899
1916
1900
// When we are reducing spilling, the target is the minimum target number of
1917
- // waves/EU determined by the subtarget.
1918
- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first ;
1901
+ // waves/EU determined by the subtarget. In cases where either one of
1902
+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1903
+ // minimum region occupancy may be higher than the latter.
1904
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1905
+ : std::max (DAG.MinOccupancy , WavesPerEU.first );
1919
1906
1920
1907
// Accounts for a reduction in RP in an optimizable region. Returns whether we
1921
1908
// estimate that we have identified enough rematerialization opportunities to
1922
1909
// achieve our goal, and sets Progress to true when this particular reduction
1923
1910
// in pressure was helpful toward that goal.
1924
1911
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1912
+ const TargetRegisterClass *RC,
1925
1913
bool &Progress) -> bool {
1926
1914
ExcessRP &Excess = OptIt->getSecond ();
1927
- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1928
- // only when we are just trying to eliminate spilling to memory. At this
1929
- // point we err on the conservative side and do not increase
1930
- // register-to-register spilling for the sake of increasing occupancy.
1931
- Progress |=
1932
- Excess.saveArchVGPRs (SIRegisterInfo::getNumCoveredRegs (Mask),
1933
- /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1915
+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs (Mask);
1916
+ if (SRI->isSGPRClass (RC)) {
1917
+ Progress |= Excess.saveSGPRs (NumRegs);
1918
+ } else if (SRI->isAGPRClass (RC)) {
1919
+ Progress |= Excess.saveAGPRs (NumRegs);
1920
+ } else {
1921
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1922
+ // only when we are just trying to eliminate spilling to memory. At this
1923
+ // point we err on the conservative side and do not increase
1924
+ // register-to-register spilling for the sake of increasing occupancy.
1925
+ Progress |=
1926
+ Excess.saveArchVGPRs (NumRegs,
1927
+ /* UseArchVGPRForAGPRSpill=*/ !IncreaseOccupancy);
1928
+ }
1934
1929
if (!Excess)
1935
1930
OptRegions.erase (OptIt->getFirst ());
1936
1931
return OptRegions.empty ();
@@ -1952,10 +1947,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1952
1947
if (!isTriviallyReMaterializable (DefMI))
1953
1948
continue ;
1954
1949
1955
- // We only support rematerializing virtual VGPRs with one definition.
1950
+ // We only support rematerializing virtual registers with one definition.
1956
1951
Register Reg = DefMI.getOperand (0 ).getReg ();
1957
- if (!Reg.isVirtual () || !SRI->isVGPRClass (DAG.MRI .getRegClass (Reg)) ||
1958
- !DAG.MRI .hasOneDef (Reg))
1952
+ if (!Reg.isVirtual () || !DAG.MRI .hasOneDef (Reg))
1959
1953
continue ;
1960
1954
1961
1955
// We only care to rematerialize the instruction if it has a single
@@ -1993,14 +1987,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1993
1987
Rematerializations.try_emplace (&DefMI, UseMI).first ->second ;
1994
1988
1995
1989
bool RematUseful = false ;
1990
+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
1996
1991
if (auto It = OptRegions.find (I); It != OptRegions.end ()) {
1997
1992
// Optimistically consider that moving the instruction out of its
1998
1993
// defining region will reduce RP in the latter; this assumes that
1999
1994
// maximum RP in the region is reached somewhere between the defining
2000
1995
// instruction and the end of the region.
2001
1996
REMAT_DEBUG (dbgs () << " Defining region is optimizable\n " );
2002
1997
LaneBitmask Mask = DAG.RegionLiveOuts .getLiveRegsForRegionIdx (I)[Reg];
2003
- if (ReduceRPInRegion (It, Mask, RematUseful))
1998
+ if (ReduceRPInRegion (It, Mask, RC, RematUseful))
2004
1999
return true ;
2005
2000
}
2006
2001
@@ -2020,7 +2015,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2020
2015
// instruction's use.
2021
2016
if (auto It = OptRegions.find (LIRegion); It != OptRegions.end ()) {
2022
2017
REMAT_DEBUG (dbgs () << " Live-in in region " << LIRegion << ' \n ' );
2023
- if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RematUseful))
2018
+ if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RC, RematUseful))
2024
2019
return true ;
2025
2020
}
2026
2021
}
0 commit comments