Skip to content

Commit 2f59ea2

Browse files
committed
Revert "MachineScheduler: Improve instruction clustering (llvm#137784)"
This reverts commit 0487db1.
1 parent c28487e commit 2f59ea2

File tree

104 files changed

+25830
-26016
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+25830
-26016
lines changed

llvm/include/llvm/CodeGen/MachineScheduler.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs {
304304
/// The bottom of the unscheduled zone.
305305
MachineBasicBlock::iterator CurrentBottom;
306306

307+
/// Record the next node in a scheduled cluster.
308+
const SUnit *NextClusterPred = nullptr;
309+
const SUnit *NextClusterSucc = nullptr;
310+
307311
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
308312
/// The number of instructions scheduled so far. Used to cut off the
309313
/// scheduler at the point determined by misched-cutoff.
@@ -364,6 +368,10 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs {
364368
/// live ranges and region boundary iterators.
365369
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
366370

371+
const SUnit *getNextClusterPred() const { return NextClusterPred; }
372+
373+
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
374+
367375
void viewGraph(const Twine &Name, const Twine &Title) override;
368376
void viewGraph() override;
369377

@@ -1287,9 +1295,6 @@ class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
12871295
SchedBoundary Top;
12881296
SchedBoundary Bot;
12891297

1290-
ClusterInfo *TopCluster;
1291-
ClusterInfo *BotCluster;
1292-
12931298
/// Candidate last picked from Top boundary.
12941299
SchedCandidate TopCand;
12951300
/// Candidate last picked from Bot boundary.
@@ -1330,9 +1335,6 @@ class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
13301335
/// Candidate last picked from Bot boundary.
13311336
SchedCandidate BotCand;
13321337

1333-
ClusterInfo *TopCluster;
1334-
ClusterInfo *BotCluster;
1335-
13361338
public:
13371339
PostGenericScheduler(const MachineSchedContext *C)
13381340
: GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),

llvm/include/llvm/CodeGen/ScheduleDAG.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include "llvm/ADT/BitVector.h"
1919
#include "llvm/ADT/PointerIntPair.h"
20-
#include "llvm/ADT/SmallSet.h"
2120
#include "llvm/ADT/SmallVector.h"
2221
#include "llvm/ADT/iterator.h"
2322
#include "llvm/CodeGen/MachineInstr.h"
@@ -236,10 +235,6 @@ class TargetRegisterInfo;
236235
LLVM_ABI void dump(const TargetRegisterInfo *TRI = nullptr) const;
237236
};
238237

239-
/// Keep record of which SUnit are in the same cluster group.
240-
typedef SmallSet<SUnit *, 8> ClusterInfo;
241-
constexpr unsigned InvalidClusterId = ~0u;
242-
243238
/// Scheduling unit. This is a node in the scheduling DAG.
244239
class SUnit {
245240
private:
@@ -280,8 +275,6 @@ class TargetRegisterInfo;
280275
unsigned TopReadyCycle = 0; ///< Cycle relative to start when node is ready.
281276
unsigned BotReadyCycle = 0; ///< Cycle relative to end when node is ready.
282277

283-
unsigned ParentClusterIdx = InvalidClusterId; ///< The parent cluster id.
284-
285278
private:
286279
unsigned Depth = 0; ///< Node depth.
287280
unsigned Height = 0; ///< Node height.

llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,6 @@ namespace llvm {
181181
/// case of a huge region that gets reduced).
182182
SUnit *BarrierChain = nullptr;
183183

184-
SmallVector<ClusterInfo> Clusters;
185-
186184
public:
187185
/// A list of SUnits, used in Value2SUsMap, during DAG construction.
188186
/// Note: to gain speed it might be worth investigating an optimized
@@ -386,14 +384,6 @@ namespace llvm {
386384
/// equivalent edge already existed (false indicates failure).
387385
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
388386

389-
/// Returns the array of the clusters.
390-
SmallVector<ClusterInfo> &getClusters() { return Clusters; }
391-
392-
/// Get the specific cluster, return nullptr for InvalidClusterId.
393-
ClusterInfo *getCluster(unsigned Idx) {
394-
return Idx != InvalidClusterId ? &Clusters[Idx] : nullptr;
395-
}
396-
397387
protected:
398388
void initSUnits();
399389
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "llvm/ADT/ArrayRef.h"
1616
#include "llvm/ADT/BitVector.h"
1717
#include "llvm/ADT/DenseMap.h"
18-
#include "llvm/ADT/EquivalenceClasses.h"
1918
#include "llvm/ADT/PriorityQueue.h"
2019
#include "llvm/ADT/STLExtras.h"
2120
#include "llvm/ADT/SmallVector.h"
@@ -944,6 +943,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
944943

945944
if (SuccEdge->isWeak()) {
946945
--SuccSU->WeakPredsLeft;
946+
if (SuccEdge->isCluster())
947+
NextClusterSucc = SuccSU;
947948
return;
948949
}
949950
#ifndef NDEBUG
@@ -966,6 +967,12 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
966967

967968
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
968969
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
970+
// Reset the next successor, For example, we want to cluster A B C.
971+
// After A is picked, we will set B as next cluster succ, but if we pick
972+
// D instead of B after A, then we need to reset the next cluster succ because
973+
// we have decided to not pick the cluster candidate B during pickNode().
974+
// Leaving B as the NextClusterSucc just make things messy.
975+
NextClusterSucc = nullptr;
969976
for (SDep &Succ : SU->Succs)
970977
releaseSucc(SU, &Succ);
971978
}
@@ -979,6 +986,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
979986

980987
if (PredEdge->isWeak()) {
981988
--PredSU->WeakSuccsLeft;
989+
if (PredEdge->isCluster())
990+
NextClusterPred = PredSU;
982991
return;
983992
}
984993
#ifndef NDEBUG
@@ -1001,6 +1010,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
10011010

10021011
/// releasePredecessors - Call releasePred on each of SU's predecessors.
10031012
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
1013+
NextClusterPred = nullptr;
10041014
for (SDep &Pred : SU->Preds)
10051015
releasePred(SU, &Pred);
10061016
}
@@ -1174,8 +1184,11 @@ findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
11741184
}
11751185

11761186
/// Identify DAG roots and setup scheduler queues.
1177-
void ScheduleDAGMI::initQueues(ArrayRef<SUnit *> TopRoots,
1178-
ArrayRef<SUnit *> BotRoots) {
1187+
void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
1188+
ArrayRef<SUnit*> BotRoots) {
1189+
NextClusterSucc = nullptr;
1190+
NextClusterPred = nullptr;
1191+
11791192
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
11801193
//
11811194
// Nodes with unreleased weak edges can still be roots.
@@ -2103,7 +2116,6 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
21032116
ScheduleDAGInstrs *DAG) {
21042117
// Keep track of the current cluster length and bytes for each SUnit.
21052118
DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
2106-
EquivalenceClasses<SUnit *> Clusters;
21072119

21082120
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
21092121
// cluster mem ops collected within `MemOpRecords` array.
@@ -2143,15 +2155,13 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
21432155

21442156
SUnit *SUa = MemOpa.SU;
21452157
SUnit *SUb = MemOpb.SU;
2146-
21472158
if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
21482159
std::swap(SUa, SUb);
21492160

21502161
// FIXME: Is this check really required?
21512162
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
21522163
continue;
21532164

2154-
Clusters.unionSets(SUa, SUb);
21552165
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
21562166
<< SUb->NodeNum << ")\n");
21572167
++NumClustered;
@@ -2191,21 +2201,6 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
21912201
<< ", Curr cluster bytes: " << CurrentClusterBytes
21922202
<< "\n");
21932203
}
2194-
2195-
// Add cluster group information.
2196-
// Iterate over all of the equivalence sets.
2197-
auto &AllClusters = DAG->getClusters();
2198-
for (const EquivalenceClasses<SUnit *>::ECValue *I : Clusters) {
2199-
if (!I->isLeader())
2200-
continue;
2201-
ClusterInfo Group;
2202-
unsigned ClusterIdx = AllClusters.size();
2203-
for (SUnit *MemberI : Clusters.members(*I)) {
2204-
MemberI->ParentClusterIdx = ClusterIdx;
2205-
Group.insert(MemberI);
2206-
}
2207-
AllClusters.push_back(Group);
2208-
}
22092204
}
22102205

22112206
void BaseMemOpClusterMutation::collectMemOpRecords(
@@ -3693,9 +3688,6 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
36933688
}
36943689
TopCand.SU = nullptr;
36953690
BotCand.SU = nullptr;
3696-
3697-
TopCluster = nullptr;
3698-
BotCluster = nullptr;
36993691
}
37003692

37013693
/// Initialize the per-region scheduling policy.
@@ -4005,11 +3997,13 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
40053997
// This is a best effort to set things up for a post-RA pass. Optimizations
40063998
// like generating loads of multiple registers should ideally be done within
40073999
// the scheduler pass by combining the loads during DAG postprocessing.
4008-
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
4009-
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
4010-
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
4011-
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
4012-
Cluster))
4000+
const SUnit *CandNextClusterSU =
4001+
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4002+
const SUnit *TryCandNextClusterSU =
4003+
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4004+
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
4005+
Cand.SU == CandNextClusterSU,
4006+
TryCand, Cand, Cluster))
40134007
return TryCand.Reason != NoCand;
40144008

40154009
if (SameBoundary) {
@@ -4268,25 +4262,11 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
42684262
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
42694263
if (IsTopNode) {
42704264
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4271-
TopCluster = DAG->getCluster(SU->ParentClusterIdx);
4272-
LLVM_DEBUG(if (TopCluster) {
4273-
dbgs() << " Top Cluster: ";
4274-
for (auto *N : *TopCluster)
4275-
dbgs() << N->NodeNum << '\t';
4276-
dbgs() << '\n';
4277-
});
42784265
Top.bumpNode(SU);
42794266
if (SU->hasPhysRegUses)
42804267
reschedulePhysReg(SU, true);
42814268
} else {
42824269
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4283-
BotCluster = DAG->getCluster(SU->ParentClusterIdx);
4284-
LLVM_DEBUG(if (BotCluster) {
4285-
dbgs() << " Bot Cluster: ";
4286-
for (auto *N : *BotCluster)
4287-
dbgs() << N->NodeNum << '\t';
4288-
dbgs() << '\n';
4289-
});
42904270
Bot.bumpNode(SU);
42914271
if (SU->hasPhysRegDefs)
42924272
reschedulePhysReg(SU, false);
@@ -4323,8 +4303,6 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
43234303
if (!Bot.HazardRec) {
43244304
Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
43254305
}
4326-
TopCluster = nullptr;
4327-
BotCluster = nullptr;
43284306
}
43294307

43304308
void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
@@ -4389,12 +4367,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
43894367
return TryCand.Reason != NoCand;
43904368

43914369
// Keep clustered nodes together.
4392-
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
4393-
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
4394-
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
4395-
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
4396-
Cluster))
4370+
const SUnit *CandNextClusterSU =
4371+
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4372+
const SUnit *TryCandNextClusterSU =
4373+
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4374+
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
4375+
Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
43974376
return TryCand.Reason != NoCand;
4377+
43984378
// Avoid critical resource consumption and balance the schedule.
43994379
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
44004380
TryCand, Cand, ResourceReduce))
@@ -4591,11 +4571,9 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
45914571
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
45924572
if (IsTopNode) {
45934573
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4594-
TopCluster = DAG->getCluster(SU->ParentClusterIdx);
45954574
Top.bumpNode(SU);
45964575
} else {
45974576
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4598-
BotCluster = DAG->getCluster(SU->ParentClusterIdx);
45994577
Bot.bumpNode(SU);
46004578
}
46014579
}

llvm/lib/CodeGen/MacroFusion.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,6 @@ bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
6161
for (SDep &SI : SecondSU.Preds)
6262
if (SI.isCluster())
6363
return false;
64-
65-
unsigned FirstCluster = FirstSU.ParentClusterIdx;
66-
unsigned SecondCluster = SecondSU.ParentClusterIdx;
67-
assert(FirstCluster == InvalidClusterId && SecondCluster == InvalidClusterId);
68-
6964
// Though the reachability checks above could be made more generic,
7065
// perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
7166
// the extra computation cost makes it less interesting in general cases.
@@ -75,14 +70,6 @@ bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
7570
if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
7671
return false;
7772

78-
auto &Clusters = DAG.getClusters();
79-
80-
FirstSU.ParentClusterIdx = Clusters.size();
81-
SecondSU.ParentClusterIdx = Clusters.size();
82-
83-
SmallSet<SUnit *, 8> Cluster{{&FirstSU, &SecondSU}};
84-
Clusters.push_back(Cluster);
85-
8673
// TODO - If we want to chain more than two instructions, we need to create
8774
// artifical edges to make dependencies from the FirstSU also dependent
8875
// on other chained instructions, and other chained instructions also

llvm/lib/CodeGen/ScheduleDAG.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,6 @@ LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const {
365365
LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const {
366366
dumpNode(SU);
367367
SU.dumpAttributes();
368-
if (SU.ParentClusterIdx != InvalidClusterId)
369-
dbgs() << " Parent Cluster Index: " << SU.ParentClusterIdx << '\n';
370-
371368
if (SU.Preds.size() > 0) {
372369
dbgs() << " Predecessors:\n";
373370
for (const SDep &Dep : SU.Preds) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -589,11 +589,12 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
589589
// This is a best effort to set things up for a post-RA pass. Optimizations
590590
// like generating loads of multiple registers should ideally be done within
591591
// the scheduler pass by combining the loads during DAG postprocessing.
592-
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
593-
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
594-
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
595-
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
596-
Cluster))
592+
const SUnit *CandNextClusterSU =
593+
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
594+
const SUnit *TryCandNextClusterSU =
595+
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
596+
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
597+
Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
597598
return TryCand.Reason != NoCand;
598599

599600
// Avoid increasing the max critical pressure in the scheduled region.
@@ -663,11 +664,12 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,
663664

664665
// MaxMemoryClause-specific: We prioritize clustered instructions as we would
665666
// get more benefit from clausing these memory instructions.
666-
const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
667-
const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
668-
if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
669-
CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
670-
Cluster))
667+
const SUnit *CandNextClusterSU =
668+
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
669+
const SUnit *TryCandNextClusterSU =
670+
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
671+
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
672+
Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
671673
return TryCand.Reason != NoCand;
672674

673675
// We only compare a subset of features when comparing nodes between

0 commit comments

Comments
 (0)