Skip to content

Commit 10c3ed6

Browse files
committed
Add support for rematerializing SGPRs and AGPRs
1 parent 7866c40 commit 10c3ed6

File tree

4 files changed

+644
-321
lines changed

4 files changed

+644
-321
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 59 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,8 @@ namespace {
17021702
/// Models excess register pressure in a region and tracks our progress as we
17031703
/// identify rematerialization opportunities.
17041704
struct ExcessRP {
1705+
/// Number of excess SGPRs.
1706+
unsigned SGPRs = 0;
17051707
/// Number of excess ArchVGPRs.
17061708
unsigned ArchVGPRs = 0;
17071709
/// Number of excess AGPRs.
@@ -1717,26 +1719,34 @@ struct ExcessRP {
17171719
bool UnifiedRF;
17181720

17191721
/// Constructs the excess RP model; determines the excess pressure w.r.t. a
1720-
/// maximum number of allowed VGPRs.
1721-
ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1722+
/// maximum number of allowed SGPRs/VGPRs.
1723+
ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1724+
unsigned MaxVGPRs);
1725+
1726+
/// Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1727+
/// these SGPRs helped reduce excess pressure.
1728+
bool saveSGPRs(unsigned NumRegs) { return saveRegs(SGPRs, NumRegs); }
17221729

17231730
/// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
17241731
/// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
17251732
/// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
17261733
/// saving these ArchVGPRs helped reduce excess pressure.
17271734
bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
17281735

1729-
/// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1730-
/// these ArchVGPRs helped reduce excess pressure.
1731-
bool saveAGPRs(unsigned NumRegs);
1736+
/// Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1737+
/// these AGPRs helped reduce excess pressure.
1738+
bool saveAGPRs(unsigned NumRegs) {
1739+
return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
1740+
}
17321741

17331742
/// Returns whether there is any excess register pressure.
1734-
operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
1743+
operator bool() const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
17351744

17361745
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
17371746
friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
1738-
OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1739-
<< Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1747+
OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1748+
<< Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1749+
<< " VGPRs in total, next ArchVGPR aligment in "
17401750
<< Excess.ArchVGPRsToAlignment << " registers)\n";
17411751
return OS;
17421752
}
@@ -1753,12 +1763,17 @@ struct ExcessRP {
17531763
} // namespace
17541764

17551765
ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
1756-
unsigned MaxVGPRs)
1766+
unsigned MaxSGPRs, unsigned MaxVGPRs)
17571767
: UnifiedRF(ST.hasGFX90AInsts()) {
1768+
// Compute excess SGPR pressure.
1769+
unsigned NumSGPRs = RP.getSGPRNum();
1770+
if (NumSGPRs > MaxSGPRs)
1771+
SGPRs = NumSGPRs - MaxSGPRs;
1772+
1773+
// Compute excess ArchVGPR/AGPR pressure.
17581774
unsigned NumArchVGPRs = RP.getArchVGPRNum();
17591775
unsigned NumAGPRs = RP.getAGPRNum();
17601776
HasAGPRs = NumAGPRs;
1761-
17621777
if (!UnifiedRF) {
17631778
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
17641779
// independently.
@@ -1839,10 +1854,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
18391854
return Progress;
18401855
}
18411856

1842-
bool ExcessRP::saveAGPRs(unsigned NumRegs) {
1843-
return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
1844-
}
1845-
18461857
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18471858
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
18481859

@@ -1865,46 +1876,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18651876
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
18661877
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy;
18671878

1868-
auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1869-
if (Cond) {
1870-
// We won't try to increase occupancy.
1871-
IncreaseOccupancy = false;
1872-
OptRegions.clear();
1873-
}
1874-
return Cond;
1875-
};
1876-
18771879
// Collect optimizable regions. If there is spilling in any region we will
1878-
// just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1879-
// occupancy by one in the whole function.
1880+
// just try to reduce spilling. Otherwise we will try to increase occupancy by
1881+
// one in the whole function.
18801882
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
18811883
GCNRegPressure &RP = DAG.Pressure[I];
1882-
1883-
// Check whether SGPR pressures prevents us from eliminating spilling.
1884-
unsigned NumSGPRs = RP.getSGPRNum();
1885-
if (NumSGPRs > MaxSGPRsNoSpill)
1886-
ClearOptRegionsIf(IncreaseOccupancy);
1887-
1888-
ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
1889-
if (Excess) {
1890-
ClearOptRegionsIf(IncreaseOccupancy);
1884+
ExcessRP Excess(ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
1885+
if (Excess && IncreaseOccupancy) {
1886+
// There is spilling in the region and we were so far trying to increase
1887+
// occupancy. Strop trying that and focus on reducing spilling.
1888+
IncreaseOccupancy = false;
1889+
OptRegions.clear();
18911890
} else if (IncreaseOccupancy) {
1892-
// Check whether SGPR pressure prevents us from increasing occupancy.
1893-
if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
1894-
if (DAG.MinOccupancy >= WavesPerEU.first)
1895-
return false;
1896-
continue;
1897-
}
1898-
if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
1899-
// We can only rematerialize ArchVGPRs at this point.
1900-
unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
1901-
bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
1902-
if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
1903-
if (DAG.MinOccupancy >= WavesPerEU.first)
1904-
return false;
1905-
continue;
1906-
}
1907-
}
1891+
Excess = ExcessRP(ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
19081892
}
19091893
if (Excess)
19101894
OptRegions.insert({I, Excess});
@@ -1924,23 +1908,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19241908
#endif
19251909

19261910
// When we are reducing spilling, the target is the minimum target number of
1927-
// waves/EU determined by the subtarget.
1928-
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first;
1911+
// waves/EU determined by the subtarget. In cases where either one of
1912+
// "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1913+
// minimum region occupancy may be higher than the latter.
1914+
TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1915+
: std::max(DAG.MinOccupancy, WavesPerEU.first);
19291916

19301917
// Accounts for a reduction in RP in an optimizable region. Returns whether we
19311918
// estimate that we have identified enough rematerialization opportunities to
19321919
// achieve our goal, and sets Progress to true when this particular reduction
19331920
// in pressure was helpful toward that goal.
19341921
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1922+
const TargetRegisterClass *RC,
19351923
bool &Progress) -> bool {
19361924
ExcessRP &Excess = OptIt->getSecond();
1937-
// We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1938-
// only when we are just trying to eliminate spilling to memory. At this
1939-
// point we err on the conservative side and do not increase
1940-
// register-to-register spilling for the sake of increasing occupancy.
1941-
Progress |=
1942-
Excess.saveArchVGPRs(SIRegisterInfo::getNumCoveredRegs(Mask),
1943-
/*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
1925+
unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
1926+
if (SRI->isSGPRClass(RC)) {
1927+
Progress |= Excess.saveSGPRs(NumRegs);
1928+
} else if (SRI->isAGPRClass(RC)) {
1929+
Progress |= Excess.saveAGPRs(NumRegs);
1930+
} else {
1931+
// We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1932+
// only when we are just trying to eliminate spilling to memory. At this
1933+
// point we err on the conservative side and do not increase
1934+
// register-to-register spilling for the sake of increasing occupancy.
1935+
Progress |=
1936+
Excess.saveArchVGPRs(NumRegs,
1937+
/*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
1938+
}
19441939
if (!Excess)
19451940
OptRegions.erase(OptIt->getFirst());
19461941
return OptRegions.empty();
@@ -1962,10 +1957,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19621957
if (!isTriviallyReMaterializable(DefMI))
19631958
continue;
19641959

1965-
// We only support rematerializing virtual VGPRs with one definition.
1960+
// We only support rematerializing virtual registers with one definition.
19661961
Register Reg = DefMI.getOperand(0).getReg();
1967-
if (!Reg.isVirtual() || !SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1968-
!DAG.MRI.hasOneDef(Reg))
1962+
if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
19691963
continue;
19701964

19711965
// We only care to rematerialize the instruction if it has a single
@@ -2003,14 +1997,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
20031997
Rematerializations.try_emplace(&DefMI, UseMI).first->second;
20041998

20051999
bool RematUseful = false;
2000+
const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
20062001
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
20072002
// Optimistically consider that moving the instruction out of its
20082003
// defining region will reduce RP in the latter; this assumes that
20092004
// maximum RP in the region is reached somewhere between the defining
20102005
// instruction and the end of the region.
20112006
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
20122007
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
2013-
if (ReduceRPInRegion(It, Mask, RematUseful))
2008+
if (ReduceRPInRegion(It, Mask, RC, RematUseful))
20142009
return true;
20152010
}
20162011

@@ -2030,7 +2025,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
20302025
// instruction's use.
20312026
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
20322027
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
2033-
if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RematUseful))
2028+
if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RC, RematUseful))
20342029
return true;
20352030
}
20362031
}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,6 @@ class ClusteredLowOccStage : public GCNSchedStage {
444444
/// estimates reducing spilling or increasing occupancy is possible, as few
445445
/// instructions as possible are rematerialized to reduce potential negative
446446
/// effects on function latency.
447-
///
448-
/// TODO: We should extend this to work on SGPRs and AGPRs as well.
449447
class PreRARematStage : public GCNSchedStage {
450448
private:
451449
/// Useful information about a rematerializable instruction.

0 commit comments

Comments
 (0)