@@ -1703,11 +1703,13 @@ namespace {
1703
1703
// / Models excess register pressure in a region and tracks our progress as we
1704
1704
// / identify rematerialization opportunities.
1705
1705
struct ExcessRP {
1706
+ // / Number of excess SGPRs.
1707
+ unsigned SGPRs = 0 ;
1706
1708
// / Number of excess ArchVGPRs.
1707
1709
unsigned ArchVGPRs = 0 ;
1708
1710
// / Number of excess AGPRs.
1709
1711
unsigned AGPRs = 0 ;
1710
- // / For unified register files, number of excess VGPRs.
1712
+ // / For unified register files, number of excess VGPRs. 0 otherwise.
1711
1713
unsigned VGPRs = 0 ;
1712
1714
// / For unified register files with AGPR usage, number of excess ArchVGPRs to
1713
1715
// / save before we are able to save a whole allocation granule.
@@ -1716,28 +1718,37 @@ struct ExcessRP {
1716
1718
bool HasAGPRs = false ;
1717
1719
// / Whether the subtarget has a unified RF.
1718
1720
bool UnifiedRF;
1721
+ // / Whether we consider that the register allocator will be able to swap
1722
+ // / between ArchVGPRs and AGPRs by copying them to a super register class.
1723
+ // / Concretely, this allows savings of one kind of VGPR to help toward savings
1724
+ // / the other kind of VGPR.
1725
+ bool CombineVGPRSavings;
1719
1726
1720
1727
// / Constructs the excess RP model; determines the excess pressure w.r.t. a
1721
- // / maximum number of allowed VGPRs.
1722
- ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1728
+ // / maximum number of allowed SGPRs/VGPRs.
1729
+ ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1730
+ unsigned MaxVGPRs, bool CombineVGPRSavings);
1723
1731
1724
- // / Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
1725
- // / UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
1726
- // / AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
1727
- // / saving these ArchVGPRs helped reduce excess pressure.
1728
- bool saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
1732
+ // / Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1733
+ // / these SGPRs helped reduce excess pressure.
1734
+ bool saveSGPRs (unsigned NumRegs) { return saveRegs (SGPRs, NumRegs); }
1729
1735
1730
- // / Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1731
- // / these ArchVGPRs helped reduce excess pressure.
1736
+ // / Accounts for \p NumRegs saved ArchVGPRs in the model. Returns whether
1737
+ // / saving these ArchGPRs helped reduce excess pressure.
1738
+ bool saveArchVGPRs (unsigned NumRegs);
1739
+
1740
+ // / Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1741
+ // / these AGPRs helped reduce excess pressure.
1732
1742
bool saveAGPRs (unsigned NumRegs);
1733
1743
1734
1744
// / Returns whether there is any excess register pressure.
1735
- operator bool () const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0 ; }
1745
+ operator bool () const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
1736
1746
1737
1747
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1738
1748
friend raw_ostream &operator <<(raw_ostream &OS, const ExcessRP &Excess) {
1739
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1740
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1749
+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1750
+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1751
+ << " VGPRs in total, next ArchVGPR aligment in "
1741
1752
<< Excess.ArchVGPRsToAlignment << " registers)\n " ;
1742
1753
return OS;
1743
1754
}
@@ -1754,12 +1765,18 @@ struct ExcessRP {
1754
1765
} // namespace
1755
1766
1756
1767
ExcessRP::ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP,
1757
- unsigned MaxVGPRs)
1758
- : UnifiedRF(ST.hasGFX90AInsts()) {
1768
+ unsigned MaxSGPRs, unsigned MaxVGPRs,
1769
+ bool CombineVGPRSavings)
1770
+ : UnifiedRF(ST.hasGFX90AInsts()), CombineVGPRSavings(CombineVGPRSavings) {
1771
+ // Compute excess SGPR pressure.
1772
+ unsigned NumSGPRs = RP.getSGPRNum ();
1773
+ if (NumSGPRs > MaxSGPRs)
1774
+ SGPRs = NumSGPRs - MaxSGPRs;
1775
+
1776
+ // Compute excess ArchVGPR/AGPR pressure.
1759
1777
unsigned NumArchVGPRs = RP.getArchVGPRNum ();
1760
1778
unsigned NumAGPRs = RP.getAGPRNum ();
1761
1779
HasAGPRs = NumAGPRs;
1762
-
1763
1780
if (!UnifiedRF) {
1764
1781
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
1765
1782
// independently.
@@ -1795,15 +1812,15 @@ ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
1795
1812
}
1796
1813
}
1797
1814
1798
- bool ExcessRP::saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill ) {
1815
+ bool ExcessRP::saveArchVGPRs (unsigned NumRegs) {
1799
1816
bool Progress = saveRegs (ArchVGPRs, NumRegs);
1800
1817
if (!NumRegs)
1801
1818
return Progress;
1802
1819
1803
1820
if (!UnifiedRF) {
1804
- if (UseArchVGPRForAGPRSpill )
1821
+ if (CombineVGPRSavings )
1805
1822
Progress |= saveRegs (AGPRs, NumRegs);
1806
- } else if (HasAGPRs && (VGPRs || (UseArchVGPRForAGPRSpill && AGPRs))) {
1823
+ } else if (HasAGPRs && (VGPRs || (CombineVGPRSavings && AGPRs))) {
1807
1824
// There is progress as long as there are VGPRs left to save, even if the
1808
1825
// save induced by this particular call does not cross an ArchVGPR alignment
1809
1826
// barrier.
@@ -1827,21 +1844,25 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
1827
1844
ArchVGPRsToAlignment -= NumRegs;
1828
1845
}
1829
1846
1830
- // Prioritize saving generic VGPRs, then AGPRs if we allow AGPR-to-ArchVGPR
1831
- // spilling and have some free ArchVGPR slots .
1847
+ // Prioritize saving generic VGPRs, then AGPRs if we consider that the
1848
+ // register allocator will be able to replace an AGPR with an ArchVGPR .
1832
1849
saveRegs (VGPRs, NumSavedRegs);
1833
- if (UseArchVGPRForAGPRSpill )
1850
+ if (CombineVGPRSavings )
1834
1851
saveRegs (AGPRs, NumSavedRegs);
1835
1852
} else {
1836
1853
// No AGPR usage in the region i.e., no allocation granule to worry about.
1837
1854
Progress |= saveRegs (VGPRs, NumRegs);
1838
1855
}
1839
-
1840
1856
return Progress;
1841
1857
}
1842
1858
1843
1859
bool ExcessRP::saveAGPRs (unsigned NumRegs) {
1844
- return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1860
+ bool Progress = saveRegs (AGPRs, NumRegs);
1861
+ if (UnifiedRF)
1862
+ Progress |= saveRegs (VGPRs, NumRegs);
1863
+ if (CombineVGPRSavings)
1864
+ Progress |= saveRegs (ArchVGPRs, NumRegs);
1865
+ return Progress;
1845
1866
}
1846
1867
1847
1868
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
@@ -1869,46 +1890,28 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1869
1890
ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 , DynamicVGPRBlockSize);
1870
1891
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1871
1892
1872
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1873
- if (Cond) {
1874
- // We won't try to increase occupancy.
1875
- IncreaseOccupancy = false ;
1876
- OptRegions.clear ();
1877
- }
1878
- return Cond;
1879
- };
1880
-
1881
1893
// Collect optimizable regions. If there is spilling in any region we will
1882
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1883
- // occupancy by one in the whole function.
1894
+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
1895
+ // one in the whole function.
1884
1896
for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1885
1897
GCNRegPressure &RP = DAG.Pressure [I];
1886
-
1887
- // Check whether SGPR pressures prevents us from eliminating spilling.
1888
- unsigned NumSGPRs = RP.getSGPRNum ();
1889
- if (NumSGPRs > MaxSGPRsNoSpill)
1890
- ClearOptRegionsIf (IncreaseOccupancy);
1891
-
1892
- ExcessRP Excess (ST, RP, MaxVGPRsNoSpill);
1893
- if (Excess) {
1894
- ClearOptRegionsIf (IncreaseOccupancy);
1898
+ // We allow ArchVGPR or AGPR savings to count as savings of the other kind
1899
+ // of VGPR only when trying to eliminate spilling. We cannot do this when
1900
+ // trying to increase occupancy since VGPR class swaps only occur later in
1901
+ // the register allocator i.e., the scheduler will not be able to reason
1902
+ // about these savings and will not report an increase in the achievable
1903
+ // occupancy, triggering rollbacks.
1904
+ ExcessRP Excess (ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill,
1905
+ /* CombineVGPRSavings=*/ true );
1906
+ if (Excess && IncreaseOccupancy) {
1907
+ // There is spilling in the region and we were so far trying to increase
1908
+ // occupancy. Strop trying that and focus on reducing spilling.
1909
+ IncreaseOccupancy = false ;
1910
+ OptRegions.clear ();
1895
1911
} else if (IncreaseOccupancy) {
1896
- // Check whether SGPR pressure prevents us from increasing occupancy.
1897
- if (ClearOptRegionsIf (NumSGPRs > MaxSGPRsIncOcc)) {
1898
- if (DAG.MinOccupancy >= WavesPerEU.first )
1899
- return false ;
1900
- continue ;
1901
- }
1902
- if ((Excess = ExcessRP (ST, RP, MaxVGPRsIncOcc))) {
1903
- // We can only rematerialize ArchVGPRs at this point.
1904
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs ;
1905
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum ();
1906
- if (ClearOptRegionsIf (Excess.AGPRs || NotEnoughArchVGPRs)) {
1907
- if (DAG.MinOccupancy >= WavesPerEU.first )
1908
- return false ;
1909
- continue ;
1910
- }
1911
- }
1912
+ // There is no spilling in the region, try to increase occupancy.
1913
+ Excess = ExcessRP (ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc,
1914
+ /* CombineVGPRSavings=*/ false );
1912
1915
}
1913
1916
if (Excess)
1914
1917
OptRegions.insert ({I, Excess});
@@ -1928,23 +1931,27 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1928
1931
#endif
1929
1932
1930
1933
// When we are reducing spilling, the target is the minimum target number of
1931
- // waves/EU determined by the subtarget.
1932
- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first ;
1934
+ // waves/EU determined by the subtarget. In cases where either one of
1935
+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1936
+ // minimum region occupancy may be higher than the latter.
1937
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1938
+ : std::max (DAG.MinOccupancy , WavesPerEU.first );
1933
1939
1934
1940
// Accounts for a reduction in RP in an optimizable region. Returns whether we
1935
1941
// estimate that we have identified enough rematerialization opportunities to
1936
1942
// achieve our goal, and sets Progress to true when this particular reduction
1937
1943
// in pressure was helpful toward that goal.
1938
1944
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1945
+ const TargetRegisterClass *RC,
1939
1946
bool &Progress) -> bool {
1940
1947
ExcessRP &Excess = OptIt->getSecond ();
1941
- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1942
- // only when we are just trying to eliminate spilling to memory. At this
1943
- // point we err on the conservative side and do not increase
1944
- // register-to-register spilling for the sake of increasing occupancy.
1945
- Progress |=
1946
- Excess. saveArchVGPRs ( SIRegisterInfo::getNumCoveredRegs (Mask),
1947
- /* UseArchVGPRForAGPRSpill= */ !IncreaseOccupancy );
1948
+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs (Mask);
1949
+ if (SRI-> isSGPRClass (RC))
1950
+ Progress |= Excess. saveSGPRs (NumRegs);
1951
+ else if (SRI-> isAGPRClass (RC))
1952
+ Progress |= Excess. saveAGPRs (NumRegs);
1953
+ else
1954
+ Progress |= Excess. saveArchVGPRs (NumRegs );
1948
1955
if (!Excess)
1949
1956
OptRegions.erase (OptIt->getFirst ());
1950
1957
return OptRegions.empty ();
@@ -1966,10 +1973,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1966
1973
if (!isTriviallyReMaterializable (DefMI))
1967
1974
continue ;
1968
1975
1969
- // We only support rematerializing virtual VGPRs with one definition.
1976
+ // We only support rematerializing virtual registers with one definition.
1970
1977
Register Reg = DefMI.getOperand (0 ).getReg ();
1971
- if (!Reg.isVirtual () || !SRI->isVGPRClass (DAG.MRI .getRegClass (Reg)) ||
1972
- !DAG.MRI .hasOneDef (Reg))
1978
+ if (!Reg.isVirtual () || !DAG.MRI .hasOneDef (Reg))
1973
1979
continue ;
1974
1980
1975
1981
// We only care to rematerialize the instruction if it has a single
@@ -2007,14 +2013,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2007
2013
Rematerializations.try_emplace (&DefMI, UseMI).first ->second ;
2008
2014
2009
2015
bool RematUseful = false ;
2016
+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
2010
2017
if (auto It = OptRegions.find (I); It != OptRegions.end ()) {
2011
2018
// Optimistically consider that moving the instruction out of its
2012
2019
// defining region will reduce RP in the latter; this assumes that
2013
2020
// maximum RP in the region is reached somewhere between the defining
2014
2021
// instruction and the end of the region.
2015
2022
REMAT_DEBUG (dbgs () << " Defining region is optimizable\n " );
2016
2023
LaneBitmask Mask = DAG.RegionLiveOuts .getLiveRegsForRegionIdx (I)[Reg];
2017
- if (ReduceRPInRegion (It, Mask, RematUseful))
2024
+ if (ReduceRPInRegion (It, Mask, RC, RematUseful))
2018
2025
return true ;
2019
2026
}
2020
2027
@@ -2034,7 +2041,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2034
2041
// instruction's use.
2035
2042
if (auto It = OptRegions.find (LIRegion); It != OptRegions.end ()) {
2036
2043
REMAT_DEBUG (dbgs () << " Live-in in region " << LIRegion << ' \n ' );
2037
- if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RematUseful))
2044
+ if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RC, RematUseful))
2038
2045
return true ;
2039
2046
}
2040
2047
}
0 commit comments