Skip to content

Commit 299737a

Browse files
committed
Add support for rematerializing SGPRs and AGPRs
1 parent 8584b21 commit 299737a

File tree

4 files changed

+644
-321
lines changed

4 files changed

+644
-321
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 59 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,8 @@ namespace {
16901690
/// Models excess register pressure in a region and tracks our progress as we
16911691
/// identify rematerialization opportunities.
16921692
struct ExcessRP {
1693+
/// Number of excess SGPRs.
1694+
unsigned SGPRs = 0;
16931695
/// Number of excess ArchVGPRs.
16941696
unsigned ArchVGPRs = 0;
16951697
/// Number of excess AGPRs.
@@ -1705,26 +1707,34 @@ struct ExcessRP {
17051707
bool UnifiedRF;
17061708

17071709
/// Constructs the excess RP model; determines the excess pressure w.r.t. a
1708-
/// maximum number of allowed VGPRs.
1709-
ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1710+
/// maximum number of allowed SGPRs/VGPRs.
1711+
ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1712+
unsigned MaxVGPRs);
1713+
1714+
/// Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1715+
/// these SGPRs helped reduce excess pressure.
1716+
bool saveSGPRs(unsigned NumRegs) { return saveRegs(SGPRs, NumRegs); }
17101717

17111718
/// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
17121719
/// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
17131720
/// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
17141721
/// saving these ArchVGPRs helped reduce excess pressure.
17151722
bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
17161723

1717-
/// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1718-
/// these ArchVGPRs helped reduce excess pressure.
1719-
bool saveAGPRs(unsigned NumRegs);
1724+
/// Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1725+
/// these AGPRs helped reduce excess pressure.
1726+
bool saveAGPRs(unsigned NumRegs) {
1727+
return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
1728+
}
17201729

17211730
/// Returns whether there is any excess register pressure.
1722-
operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
1731+
operator bool() const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
17231732

17241733
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
17251734
friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
1726-
OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1727-
<< Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1735+
OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1736+
<< Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1737+
<< " VGPRs in total, next ArchVGPR aligment in "
17281738
<< Excess.ArchVGPRsToAlignment << " registers)\n";
17291739
return OS;
17301740
}
@@ -1741,12 +1751,17 @@ struct ExcessRP {
17411751
} // namespace
17421752

17431753
ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
1744-
unsigned MaxVGPRs)
1754+
unsigned MaxSGPRs, unsigned MaxVGPRs)
17451755
: UnifiedRF(ST.hasGFX90AInsts()) {
1756+
// Compute excess SGPR pressure.
1757+
unsigned NumSGPRs = RP.getSGPRNum();
1758+
if (NumSGPRs > MaxSGPRs)
1759+
SGPRs = NumSGPRs - MaxSGPRs;
1760+
1761+
// Compute excess ArchVGPR/AGPR pressure.
17461762
unsigned NumArchVGPRs = RP.getArchVGPRNum();
17471763
unsigned NumAGPRs = RP.getAGPRNum();
17481764
HasAGPRs = NumAGPRs;
1749-
17501765
if (!UnifiedRF) {
17511766
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
17521767
// independently.
@@ -1827,10 +1842,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
18271842
return Progress;
18281843
}
18291844

1830-
bool ExcessRP::saveAGPRs(unsigned NumRegs) {
1831-
return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
1832-
}
1833-
18341845
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18351846
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
18361847

@@ -1853,46 +1864,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18531864
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
18541865
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy;
18551866

1856-
auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1857-
if (Cond) {
1858-
// We won't try to increase occupancy.
1859-
IncreaseOccupancy = false;
1860-
OptRegions.clear();
1861-
}
1862-
return Cond;
1863-
};
1864-
18651867
// Collect optimizable regions. If there is spilling in any region we will
1866-
// just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1867-
// occupancy by one in the whole function.
1868+
// just try to reduce spilling. Otherwise we will try to increase occupancy by
1869+
// one in the whole function.
18681870
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
18691871
GCNRegPressure &RP = DAG.Pressure[I];
1870-
1871-
// Check whether SGPR pressures prevents us from eliminating spilling.
1872-
unsigned NumSGPRs = RP.getSGPRNum();
1873-
if (NumSGPRs > MaxSGPRsNoSpill)
1874-
ClearOptRegionsIf(IncreaseOccupancy);
1875-
1876-
ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
1877-
if (Excess) {
1878-
ClearOptRegionsIf(IncreaseOccupancy);
1872+
ExcessRP Excess(ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
1873+
if (Excess && IncreaseOccupancy) {
1874+
// There is spilling in the region and we were so far trying to increase
1875+
// occupancy. Strop trying that and focus on reducing spilling.
1876+
IncreaseOccupancy = false;
1877+
OptRegions.clear();
18791878
} else if (IncreaseOccupancy) {
1880-
// Check whether SGPR pressure prevents us from increasing occupancy.
1881-
if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
1882-
if (DAG.MinOccupancy >= WavesPerEU.first)
1883-
return false;
1884-
continue;
1885-
}
1886-
if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
1887-
// We can only rematerialize ArchVGPRs at this point.
1888-
unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
1889-
bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
1890-
if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
1891-
if (DAG.MinOccupancy >= WavesPerEU.first)
1892-
return false;
1893-
continue;
1894-
}
1895-
}
1879+
Excess = ExcessRP(ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
18961880
}
18971881
if (Excess)
18981882
OptRegions.insert({I, Excess});
@@ -1912,23 +1896,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19121896
#endif
19131897

19141898
// When we are reducing spilling, the target is the minimum target number of
1915-
// waves/EU determined by the subtarget.
1916-
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first;
1899+
// waves/EU determined by the subtarget. In cases where either one of
1900+
// "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1901+
// minimum region occupancy may be higher than the latter.
1902+
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1903+
: std::max(DAG.MinOccupancy, WavesPerEU.first);
19171904

19181905
// Accounts for a reduction in RP in an optimizable region. Returns whether we
19191906
// estimate that we have identified enough rematerialization opportunities to
19201907
// achieve our goal, and sets Progress to true when this particular reduction
19211908
// in pressure was helpful toward that goal.
19221909
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1910+
const TargetRegisterClass *RC,
19231911
bool &Progress) -> bool {
19241912
ExcessRP &Excess = OptIt->getSecond();
1925-
// We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1926-
// only when we are just trying to eliminate spilling to memory. At this
1927-
// point we err on the conservative side and do not increase
1928-
// register-to-register spilling for the sake of increasing occupancy.
1929-
Progress |=
1930-
Excess.saveArchVGPRs(SIRegisterInfo::getNumCoveredRegs(Mask),
1931-
/*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
1913+
unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
1914+
if (SRI->isSGPRClass(RC)) {
1915+
Progress |= Excess.saveSGPRs(NumRegs);
1916+
} else if (SRI->isAGPRClass(RC)) {
1917+
Progress |= Excess.saveAGPRs(NumRegs);
1918+
} else {
1919+
// We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1920+
// only when we are just trying to eliminate spilling to memory. At this
1921+
// point we err on the conservative side and do not increase
1922+
// register-to-register spilling for the sake of increasing occupancy.
1923+
Progress |=
1924+
Excess.saveArchVGPRs(NumRegs,
1925+
/*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
1926+
}
19321927
if (!Excess)
19331928
OptRegions.erase(OptIt->getFirst());
19341929
return OptRegions.empty();
@@ -1950,10 +1945,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19501945
if (!isTriviallyReMaterializable(DefMI))
19511946
continue;
19521947

1953-
// We only support rematerializing virtual VGPRs with one definition.
1948+
// We only support rematerializing virtual registers with one definition.
19541949
Register Reg = DefMI.getOperand(0).getReg();
1955-
if (!Reg.isVirtual() || !SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1956-
!DAG.MRI.hasOneDef(Reg))
1950+
if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
19571951
continue;
19581952

19591953
// We only care to rematerialize the instruction if it has a single
@@ -1991,14 +1985,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19911985
Rematerializations.try_emplace(&DefMI, UseMI).first->second;
19921986

19931987
bool RematUseful = false;
1988+
const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
19941989
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
19951990
// Optimistically consider that moving the instruction out of its
19961991
// defining region will reduce RP in the latter; this assumes that
19971992
// maximum RP in the region is reached somewhere between the defining
19981993
// instruction and the end of the region.
19991994
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
20001995
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
2001-
if (ReduceRPInRegion(It, Mask, RematUseful))
1996+
if (ReduceRPInRegion(It, Mask, RC, RematUseful))
20021997
return true;
20031998
}
20041999

@@ -2018,7 +2013,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
20182013
// instruction's use.
20192014
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
20202015
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
2021-
if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RematUseful))
2016+
if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RC, RematUseful))
20222017
return true;
20232018
}
20242019
}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,8 +440,6 @@ class ClusteredLowOccStage : public GCNSchedStage {
440440
/// estimates reducing spilling or increasing occupancy is possible, as few
441441
/// instructions as possible are rematerialized to reduce potential negative
442442
/// effects on function latency.
443-
///
444-
/// TODO: We should extend this to work on SGPRs and AGPRs as well.
445443
class PreRARematStage : public GCNSchedStage {
446444
private:
447445
/// Useful information about a rematerializable instruction.

0 commit comments

Comments
 (0)