Skip to content

Commit 06bf083

Browse files
committed
[AMDGPU] Fall back to SavedMutations when not applying IGLP
Change-Id: I4ef67afa5e68714231a01afb1e9f58dbddd07368
1 parent 1f4d9b3 commit 06bf083

File tree

4 files changed

+56
-30
lines changed

4 files changed

+56
-30
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,7 @@ class IGLPStrategy {
849849

850850
public:
851851
/// Add SchedGroups to \p SyncedSchedGroups to implement this Strategy.
852-
virtual void applyIGLPStrategy(
852+
virtual bool applyIGLPStrategy(
853853
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
854854
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
855855
IGLPPhase Phase) = 0;
@@ -868,7 +868,7 @@ class IGLPStrategy {
868868
class MFMASmallGemmOpt final : public IGLPStrategy {
869869
private:
870870
public:
871-
void applyIGLPStrategy(
871+
bool applyIGLPStrategy(
872872
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
873873
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
874874
IGLPPhase Phase) override;
@@ -881,7 +881,7 @@ class MFMASmallGemmOpt final : public IGLPStrategy {
881881
}
882882
};
883883

884-
void MFMASmallGemmOpt::applyIGLPStrategy(
884+
bool MFMASmallGemmOpt::applyIGLPStrategy(
885885
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
886886
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
887887
IGLPPhase Phase) {
@@ -902,6 +902,8 @@ void MFMASmallGemmOpt::applyIGLPStrategy(
902902
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
903903
SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
904904
}
905+
906+
return true;
905907
}
906908

907909
class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
@@ -1098,7 +1100,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
10981100
};
10991101

11001102
public:
1101-
void applyIGLPStrategy(
1103+
bool applyIGLPStrategy(
11021104
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
11031105
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
11041106
IGLPPhase Phase) override;
@@ -1115,7 +1117,7 @@ static unsigned DSWCount = 0;
11151117
static unsigned DSWWithPermCount = 0;
11161118
static unsigned DSWWithSharedVMEMCount = 0;
11171119

1118-
void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
1120+
bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
11191121
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
11201122
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
11211123
IGLPPhase Phase) {
@@ -1355,6 +1357,8 @@ void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
13551357
SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
13561358
SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
13571359
}
1360+
1361+
return true;
13581362
}
13591363

13601364
static std::unique_ptr<IGLPStrategy>
@@ -1376,6 +1380,8 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
13761380

13771381
ScheduleDAGMI *DAG;
13781382

1383+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations;
1384+
13791385
// Organize lists of SchedGroups by their SyncID. SchedGroups /
13801386
// SCHED_GROUP_BARRIERs with different SyncIDs will have no edges added
13811387
// between then.
@@ -1402,7 +1408,7 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
14021408
void initSchedGroupBarrierPipelineStage(
14031409
std::vector<SUnit>::reverse_iterator RIter);
14041410

1405-
void initIGLPOpt(SUnit &SU);
1411+
bool initIGLPOpt(SUnit &SU);
14061412

14071413
public:
14081414
void apply(ScheduleDAGInstrs *DAGInstrs) override;
@@ -1418,7 +1424,10 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
14181424
IGLPPhase Phase = IGLPPhase::Initial;
14191425

14201426
IGroupLPDAGMutation() = default;
1421-
IGroupLPDAGMutation(IGLPPhase Phase) : Phase(Phase) {}
1427+
IGroupLPDAGMutation(
1428+
IGLPPhase Phase,
1429+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations)
1430+
: SavedMutations(SavedMutations), Phase(Phase) {}
14221431
};
14231432

14241433
unsigned SchedGroup::NumSchedGroups = 0;
@@ -1609,31 +1618,41 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
16091618
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
16101619
SyncedSchedGroups.clear();
16111620
SyncedInstrs.clear();
1612-
bool foundSB = false;
1613-
bool foundIGLP = false;
1621+
bool FoundSB = false;
1622+
bool FoundIGLP = false;
1623+
bool ShouldApplyIGLP = false;
16141624
for (auto R = DAG->SUnits.rbegin(), E = DAG->SUnits.rend(); R != E; ++R) {
16151625
unsigned Opc = R->getInstr()->getOpcode();
16161626
// SCHED_[GROUP_]BARRIER and IGLP are mutually exclusive.
16171627
if (Opc == AMDGPU::SCHED_BARRIER) {
16181628
addSchedBarrierEdges(*R);
1619-
foundSB = true;
1629+
FoundSB = true;
16201630
} else if (Opc == AMDGPU::SCHED_GROUP_BARRIER) {
16211631
initSchedGroupBarrierPipelineStage(R);
1622-
foundSB = true;
1632+
FoundSB = true;
16231633
} else if (Opc == AMDGPU::IGLP_OPT) {
16241634
resetEdges(*R, DAG);
1625-
if (!foundSB && !foundIGLP)
1626-
initIGLPOpt(*R);
1627-
foundIGLP = true;
1635+
if (!FoundSB && !FoundIGLP) {
1636+
FoundIGLP = true;
1637+
ShouldApplyIGLP = initIGLPOpt(*R);
1638+
}
16281639
}
16291640
}
16301641

1631-
if (foundSB || foundIGLP) {
1642+
if (FoundSB || (FoundIGLP && ShouldApplyIGLP)) {
16321643
PipelineSolver PS(SyncedSchedGroups, SyncedInstrs, DAG, IsBottomUp);
16331644
// PipelineSolver performs the mutation by adding the edges it
16341645
// determined as the best
16351646
PS.solve();
1647+
return;
16361648
}
1649+
1650+
if (!SavedMutations)
1651+
return;
1652+
1653+
// We did not apply a mutation, fall back to SavedMutations
1654+
for (auto &m : *SavedMutations)
1655+
m->apply(DAG);
16371656
}
16381657

16391658
void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
@@ -1712,14 +1731,15 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
17121731
SG.initSchedGroup(RIter, SyncedInstrs[SG.getSyncID()]);
17131732
}
17141733

1715-
void IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
1734+
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
17161735
IGLPStrategyID StrategyID =
17171736
(IGLPStrategyID)SU.getInstr()->getOperand(0).getImm();
17181737
auto S = createIGLPStrategy(StrategyID, DAG, TII);
1719-
if (S->shouldApplyStrategy(DAG)) {
1720-
IsBottomUp = S->IsBottomUp;
1721-
S->applyIGLPStrategy(SyncedInstrs, SyncedSchedGroups, Phase);
1722-
}
1738+
if (!S->shouldApplyStrategy(DAG))
1739+
return false;
1740+
1741+
IsBottomUp = S->IsBottomUp;
1742+
return S->applyIGLPStrategy(SyncedInstrs, SyncedSchedGroups, Phase);
17231743
}
17241744

17251745
} // namespace
@@ -1731,9 +1751,10 @@ namespace llvm {
17311751
/// same scheduling region (e.g. pre and post-RA scheduling / multiple
17321752
/// scheduling "phases"), we can reenter this mutation framework more than once
17331753
/// for a given region.
1734-
std::unique_ptr<ScheduleDAGMutation>
1735-
createIGroupLPDAGMutation(IGLPPhase Phase) {
1736-
return std::make_unique<IGroupLPDAGMutation>(Phase);
1754+
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
1755+
IGLPPhase Phase,
1756+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations) {
1757+
return std::make_unique<IGroupLPDAGMutation>(Phase, SavedMutations);
17371758
}
17381759

17391760
} // end namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@
1111

1212
#include "llvm/CodeGen/ScheduleDAGMutation.h"
1313
#include <memory>
14+
#include <vector>
1415

1516
namespace llvm {
1617

1718
// Components of the mask that determines which instruction types may be may be
1819
// classified into a SchedGroup.
1920
enum class IGLPPhase { Initial = 0u, PreRAReentry = 1u << 0, PostRA = 1u << 1 };
2021

21-
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(IGLPPhase Phase);
22+
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
23+
IGLPPhase Phase,
24+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations);
2225

2326
} // namespace llvm
2427

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
461461
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
462462
if (ST.shouldClusterStores())
463463
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
464-
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::Initial));
464+
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::Initial, nullptr));
465465
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
466466
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
467467
return DAG;
@@ -471,7 +471,7 @@ static ScheduleDAGInstrs *
471471
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
472472
ScheduleDAGMILive *DAG =
473473
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
474-
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::Initial));
474+
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::Initial, nullptr));
475475
return DAG;
476476
}
477477

@@ -934,7 +934,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
934934
if (ST.shouldClusterStores())
935935
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
936936
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
937-
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::PostRA));
937+
DAG->addMutation(createIGroupLPDAGMutation(IGLPPhase::PostRA, nullptr));
938938
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
939939
DAG->addMutation(createVOPDPairingMutation());
940940
return DAG;

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
713713
return false;
714714

715715
SavedMutations.swap(DAG.Mutations);
716-
DAG.addMutation(createIGroupLPDAGMutation(IGLPPhase::PreRAReentry));
716+
DAG.addMutation(createIGroupLPDAGMutation(IGLPPhase::PreRAReentry, nullptr));
717717

718718
InitialOccupancy = DAG.MinOccupancy;
719719
// Aggressivly try to reduce register pressure in the unclustered high RP
@@ -856,7 +856,8 @@ bool GCNSchedStage::initGCNRegion() {
856856
bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
857857
StageID == GCNSchedStageID::ILPInitialSchedule;
858858
DAG.addMutation(createIGroupLPDAGMutation(
859-
IsInitialStage ? IGLPPhase::Initial : IGLPPhase::PreRAReentry));
859+
IsInitialStage ? IGLPPhase::Initial : IGLPPhase::PreRAReentry,
860+
&SavedMutations));
860861
}
861862

862863
return true;
@@ -1570,7 +1571,8 @@ void GCNPostScheduleDAGMILive::schedule() {
15701571
if (HasIGLPInstrs) {
15711572
SavedMutations.clear();
15721573
SavedMutations.swap(Mutations);
1573-
addMutation(createIGroupLPDAGMutation(/*IsReentry=*/IGLPPhase::PostRA));
1574+
addMutation(createIGroupLPDAGMutation(/*IsReentry=*/IGLPPhase::PostRA,
1575+
&SavedMutations));
15741576
}
15751577

15761578
ScheduleDAGMI::schedule();

0 commit comments

Comments
 (0)