Skip to content

Commit b5f132a

Browse files
jrbyrnesbcahoon
authored andcommitted
[AMDGPU] Teach iterative schedulers about IGLP (llvm#134953)
This adds IGLP mutation to the iterative schedulers (`gcn-iterative-max-occupancy-experimental`, `gcn-iterative-minreg`, and `gcn-iterative-ilp`). The `gcn-iterative-minreg` and `gcn-iterative-ilp` schedulers never actually applied the mutations added, so this also has the effect of teaching them about mutations in general. The `gcn-iterative-max-occupancy-experimental` scheduler has calls to `ScheduleDAGMILive::schedule()`, so, before this, mutations were applied at this point. Now this is done during calls to `BuildDAG`, with IGLP superseding other mutations (similar to the other schedulers). We may end up scheduling regions multiple times, with mutations being applied each time, so we need to track for `AMDGPU::SchedulingPhase::PreRAReentry` (cherry picked from commit 241c519)
1 parent 7101fb3 commit b5f132a

File tree

8 files changed

+1005
-26
lines changed

8 files changed

+1005
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,12 +604,15 @@ createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
604604
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
605605
if (ST.shouldClusterStores())
606606
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
607+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
607608
return DAG;
608609
}
609610

610611
static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
611-
return new GCNIterativeScheduler(C,
612-
GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
612+
auto *DAG = new GCNIterativeScheduler(
613+
C, GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
614+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
615+
return DAG;
613616
}
614617

615618
static ScheduleDAGInstrs *
@@ -620,6 +623,7 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
620623
if (ST.shouldClusterStores())
621624
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
622625
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
626+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
623627
return DAG;
624628
}
625629

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "GCNIterativeScheduler.h"
15+
#include "AMDGPUIGroupLP.h"
1516
#include "GCNSchedStrategy.h"
1617
#include "SIMachineFunctionInfo.h"
1718

@@ -118,21 +119,42 @@ void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
118119
}
119120
#endif
120121

122+
void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123+
bool HasIGLPInstrs = false;
124+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII);
125+
for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) {
126+
if (SII->isIGLPMutationOnly(I->getOpcode())) {
127+
HasIGLPInstrs = true;
128+
break;
129+
}
130+
}
131+
132+
if (HasIGLPInstrs) {
133+
SavedMutations.clear();
134+
SavedMutations.swap(Mutations);
135+
auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136+
: AMDGPU::SchedulingPhase::Initial;
137+
138+
addMutation(createIGroupLPDAGMutation(SchedPhase));
139+
}
140+
}
141+
121142
// DAG builder helper
122143
class GCNIterativeScheduler::BuildDAG {
123144
GCNIterativeScheduler &Sch;
124145
SmallVector<SUnit *, 8> TopRoots;
125146

126147
SmallVector<SUnit*, 8> BotRoots;
127148
public:
128-
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
129-
: Sch(_Sch) {
149+
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150+
: Sch(_Sch) {
130151
auto *BB = R.Begin->getParent();
131152
Sch.BaseClass::startBlock(BB);
132153
Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
133-
154+
Sch.swapIGLPMutations(R, IsReentry);
134155
Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
135156
/*TrackLaneMask*/true);
157+
Sch.postProcessDAG();
136158
Sch.Topo.InitDAGTopologicalSorting();
137159
Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
138160
}
@@ -432,13 +454,15 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
432454

433455
auto NewOcc = TargetOcc;
434456
for (auto *R : Regions) {
457+
// Always build the DAG to add mutations
458+
BuildDAG DAG(*R, *this);
459+
435460
if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
436-
break;
461+
continue;
437462

438463
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
439464
printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
440465

441-
BuildDAG DAG(*R, *this);
442466
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
443467
const auto MaxRP = getSchedulePressure(*R, MinSchedule);
444468
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
@@ -469,8 +493,11 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
469493
sortRegionsByPressure(TgtOcc);
470494
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
471495

472-
if (TryMaximizeOccupancy && Occ < TgtOcc)
496+
bool IsReentry = false;
497+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
473498
Occ = tryMaximizeOccupancy(TgtOcc);
499+
IsReentry = true;
500+
}
474501

475502
// This is really weird but for some magic scheduling regions twice
476503
// gives performance improvement
@@ -489,7 +516,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
489516
LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
490517
for (auto *R : Regions) {
491518
OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
492-
519+
IsReentry |= I > 0;
520+
swapIGLPMutations(*R, IsReentry);
493521
Ovr.schedule();
494522
const auto RP = getRegionPressure(*R);
495523
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
@@ -556,8 +584,11 @@ void GCNIterativeScheduler::scheduleILP(
556584
sortRegionsByPressure(TgtOcc);
557585
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
558586

559-
if (TryMaximizeOccupancy && Occ < TgtOcc)
587+
bool IsReentry = false;
588+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
560589
Occ = tryMaximizeOccupancy(TgtOcc);
590+
IsReentry = true;
591+
}
561592

562593
TgtOcc = std::min(Occ, TgtOcc);
563594
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
@@ -566,7 +597,7 @@ void GCNIterativeScheduler::scheduleILP(
566597

567598
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
568599
for (auto *R : Regions) {
569-
BuildDAG DAG(*R, *this);
600+
BuildDAG DAG(*R, *this, IsReentry);
570601
const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
571602

572603
const auto RP = getSchedulePressure(*R, ILPSchedule);

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
7777
const StrategyKind Strategy;
7878
mutable GCNUpwardRPTracker UPTracker;
7979

80+
std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
81+
8082
class BuildDAG;
8183
class OverrideLegacyStrategy;
8284

@@ -91,6 +93,7 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
9193
return getRegionPressure(R.Begin, R.End);
9294
}
9395

96+
void swapIGLPMutations(const Region &R, bool IsReentry);
9497
void setBestSchedule(Region &R,
9598
ScheduleRef Schedule,
9699
const GCNRegPressure &MaxRP = GCNRegPressure());

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,6 @@ static void getRegisterPressures(
188188
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189189
}
190190

191-
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
192-
// mutations, requiring all other mutations to be disabled.
193-
static bool isIGLPMutationOnly(unsigned Opcode) {
194-
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195-
}
196-
197191
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
198192
bool AtTop,
199193
const RegPressureTracker &RPTracker,
@@ -1155,9 +1149,10 @@ bool GCNSchedStage::initGCNRegion() {
11551149
Unsched.reserve(DAG.NumRegionInstrs);
11561150
if (StageID == GCNSchedStageID::OccInitialSchedule ||
11571151
StageID == GCNSchedStageID::ILPInitialSchedule) {
1152+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);
11581153
for (auto &I : DAG) {
11591154
Unsched.push_back(&I);
1160-
if (isIGLPMutationOnly(I.getOpcode()))
1155+
if (SII->isIGLPMutationOnly(I.getOpcode()))
11611156
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
11621157
}
11631158
} else {
@@ -1905,8 +1900,9 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
19051900
}
19061901

19071902
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1908-
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
1909-
return isIGLPMutationOnly(MI->getOpcode());
1903+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);
1904+
return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {
1905+
return SII->isIGLPMutationOnly(MI->getOpcode());
19101906
});
19111907
}
19121908

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
993993

994994
bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
995995

996+
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
997+
// mutations, requiring all other mutations to be disabled.
998+
bool isIGLPMutationOnly(unsigned Opcode) const {
999+
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1000+
}
1001+
9961002
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
9971003
switch (Opcode) {
9981004
case AMDGPU::S_WAITCNT_soft:

llvm/test/CodeGen/AMDGPU/iglp.opt.reentry.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 < %s | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-max-occupancy-experimental < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-ilp < %s | FileCheck %s
24

35
; Test should not result in build failure
46
; CHECK-LABEL: shouldNotReApply

0 commit comments

Comments
 (0)