Skip to content

Commit 280ac1f

Browse files
committed
[MCA] Refactor class LSUnit. NFCI
This should be the last bit of refactoring in preparation for a patch that would finally fix PR37494. This patch introduces the concept of memory dependency groups (class MemoryGroup) and "Load/Store Unit token" (LSUToken) to track the status of a memory operation. A MemoryGroup is a node of a memory dependency graph. It is used internally to classify memory operations based on the memory operations they depend on. Let I and J be two memory operations, we say that I and J equivalent (for the purpose of mapping instructions to memory dependency groups) if the set of memory operations they depend depend on is identical. MemoryGroups are identified by so-called LSUToken (a unique group identifier assigned by the LSUnit to every group). When an instruction I is dispatched to the LSUnit, the LSUnit maps I to a group, and then returns a LSUToken. LSUTokens are used by class Scheduler to track memory dependencies. This patch simplifies the LSUnit interface and moves most of the implementation details to its base class (LSUnitBase). There is no user visible change to the output. llvm-svn: 361950
1 parent a6fb183 commit 280ac1f

File tree

5 files changed

+385
-255
lines changed

5 files changed

+385
-255
lines changed

llvm/include/llvm/MCA/HardwareUnits/LSUnit.h

Lines changed: 223 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
#ifndef LLVM_MCA_LSUNIT_H
1616
#define LLVM_MCA_LSUNIT_H
1717

18-
#include "llvm/ADT/SmallSet.h"
18+
#include "llvm/ADT/DenseMap.h"
19+
#include "llvm/ADT/SmallVector.h"
1920
#include "llvm/MC/MCSchedule.h"
2021
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
2122
#include "llvm/MCA/Instruction.h"
@@ -25,6 +26,143 @@ namespace mca {
2526

2627
class Scheduler;
2728

29+
/// A node of a memory dependency graph. A MemoryGroup describes a set of
30+
/// instructions with same memory dependencies.
31+
///
32+
/// By construction, instructions of a MemoryGroup don't depend on each other.
33+
/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
34+
/// A Memory group identifier is then stored as a "token" in field
35+
/// Instruction::LSUTokenID of each dispatched instructions. That token is used
36+
/// internally by the LSUnit to track memory dependencies.
37+
class MemoryGroup {
38+
unsigned NumPredecessors;
39+
unsigned NumExecutingPredecessors;
40+
unsigned NumExecutedPredecessors;
41+
42+
unsigned NumInstructions;
43+
unsigned NumExecuting;
44+
unsigned NumExecuted;
45+
SmallVector<MemoryGroup *, 4> Succ;
46+
47+
CriticalDependency CriticalPredecessor;
48+
InstRef CriticalMemoryInstruction;
49+
50+
MemoryGroup(const MemoryGroup &) = delete;
51+
MemoryGroup &operator=(const MemoryGroup &) = delete;
52+
53+
public:
54+
MemoryGroup()
55+
: NumPredecessors(0), NumExecutingPredecessors(0),
56+
NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
57+
NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
58+
MemoryGroup(MemoryGroup &&) = default;
59+
60+
ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
61+
unsigned getNumSuccessors() const { return Succ.size(); }
62+
unsigned getNumPredecessors() const { return NumPredecessors; }
63+
unsigned getNumExecutingPredecessors() const {
64+
return NumExecutingPredecessors;
65+
}
66+
unsigned getNumExecutedPredecessors() const {
67+
return NumExecutedPredecessors;
68+
}
69+
unsigned getNumInstructions() const { return NumInstructions; }
70+
unsigned getNumExecuting() const { return NumExecuting; }
71+
unsigned getNumExecuted() const { return NumExecuted; }
72+
73+
const InstRef &getCriticalMemoryInstruction() const {
74+
return CriticalMemoryInstruction;
75+
}
76+
const CriticalDependency &getCriticalPredecessor() const {
77+
return CriticalPredecessor;
78+
}
79+
80+
void addSuccessor(MemoryGroup *Group) {
81+
Group->NumPredecessors++;
82+
assert(!isExecuted() && "Should have been removed!");
83+
if (isExecuting())
84+
Group->onGroupIssued(CriticalMemoryInstruction);
85+
Succ.emplace_back(Group);
86+
}
87+
88+
bool isWaiting() const {
89+
return NumPredecessors >
90+
(NumExecutingPredecessors + NumExecutedPredecessors);
91+
}
92+
bool isPending() const {
93+
return NumExecutingPredecessors &&
94+
((NumExecutedPredecessors + NumExecutingPredecessors) ==
95+
NumPredecessors);
96+
}
97+
bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
98+
bool isExecuting() const {
99+
return NumExecuting == NumInstructions - NumExecuted;
100+
}
101+
bool isExecuted() const { return NumInstructions == NumExecuted; }
102+
103+
void onGroupIssued(const InstRef &IR) {
104+
assert(!isReady() && "Unexpected group-start event!");
105+
NumExecutingPredecessors++;
106+
107+
unsigned Cycles = IR.getInstruction()->getCyclesLeft();
108+
if (CriticalPredecessor.Cycles < Cycles) {
109+
CriticalPredecessor.IID = IR.getSourceIndex();
110+
CriticalPredecessor.Cycles = Cycles;
111+
}
112+
}
113+
114+
void onGroupExecuted() {
115+
assert(!isReady() && "Inconsistent state found!");
116+
NumExecutingPredecessors--;
117+
NumExecutedPredecessors++;
118+
}
119+
120+
void onInstructionIssued(const InstRef &IR) {
121+
assert(!isExecuting() && "Invalid internal state!");
122+
++NumExecuting;
123+
124+
// update the CriticalMemDep.
125+
const Instruction &IS = *IR.getInstruction();
126+
if ((bool)CriticalMemoryInstruction) {
127+
const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
128+
if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
129+
CriticalMemoryInstruction = IR;
130+
} else {
131+
CriticalMemoryInstruction = IR;
132+
}
133+
134+
if (!isExecuting())
135+
return;
136+
137+
// Notify successors that this group started execution.
138+
for (MemoryGroup *MG : Succ)
139+
MG->onGroupIssued(CriticalMemoryInstruction);
140+
}
141+
142+
void onInstructionExecuted() {
143+
assert(isReady() && !isExecuted() && "Invalid internal state!");
144+
--NumExecuting;
145+
++NumExecuted;
146+
147+
if (!isExecuted())
148+
return;
149+
150+
// Notify successors that this group has finished execution.
151+
for (MemoryGroup *MG : Succ)
152+
MG->onGroupExecuted();
153+
}
154+
155+
void addInstruction() {
156+
assert(!getNumSuccessors() && "Cannot add instructions to this group!");
157+
++NumInstructions;
158+
}
159+
160+
void cycleEvent() {
161+
if (CriticalPredecessor.Cycles)
162+
CriticalPredecessor.Cycles--;
163+
}
164+
};
165+
28166
/// Abstract base interface for LS (load/store) units in llvm-mca.
29167
class LSUnitBase : public HardwareUnit {
30168
/// Load queue size.
@@ -43,13 +181,20 @@ class LSUnitBase : public HardwareUnit {
43181
/// llvm/Target/TargetSchedule.td).
44182
unsigned SQSize;
45183

184+
unsigned UsedLQEntries;
185+
unsigned UsedSQEntries;
186+
46187
/// True if loads don't alias with stores.
47188
///
48189
/// By default, the LS unit assumes that loads and stores don't alias with
49190
/// eachother. If this field is set to false, then loads are always assumed to
50191
/// alias with stores.
51192
const bool NoAlias;
52193

194+
/// Used to map group identifiers to MemoryGroups.
195+
DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
196+
unsigned NextGroupID;
197+
53198
public:
54199
LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
55200
unsigned StoreQueueSize, bool AssumeNoAlias);
@@ -62,6 +207,11 @@ class LSUnitBase : public HardwareUnit {
62207
/// Returns the total number of entries in the store queue.
63208
unsigned getStoreQueueSize() const { return SQSize; }
64209

210+
unsigned getUsedLQEntries() const { return UsedLQEntries; }
211+
unsigned getUsedSQEntries() const { return UsedSQEntries; }
212+
unsigned assignLQSlot() { return UsedLQEntries++; }
213+
unsigned assignSQSlot() { return UsedSQEntries++; }
214+
65215
bool assumeNoAlias() const { return NoAlias; }
66216

67217
enum Status {
@@ -81,25 +231,71 @@ class LSUnitBase : public HardwareUnit {
81231
///
82232
/// This method assumes that a previous call to `isAvailable(IR)` succeeded
83233
/// with a LSUnitBase::Status value of LSU_AVAILABLE.
84-
virtual void dispatch(const InstRef &IR) = 0;
234+
/// Returns the GroupID associated with this instruction. That value will be
235+
/// used to set the LSUTokenID field in class Instruction.
236+
virtual unsigned dispatch(const InstRef &IR) = 0;
237+
238+
bool isSQEmpty() const { return !UsedSQEntries; }
239+
bool isLQEmpty() const { return !UsedLQEntries; }
240+
bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
241+
bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
242+
243+
bool isValidGroupID(unsigned Index) const {
244+
return Index && (Groups.find(Index) != Groups.end());
245+
}
85246

86247
/// Check if a peviously dispatched instruction IR is now ready for execution.
87-
///
88-
/// Instruction IR is assumed to be a memory operation. If IR is still waiting
89-
/// on another memory instruction M, then M is returned to the caller. If IR
90-
/// depends on more than one memory operations, then this method returns one
91-
/// of them.
92-
///
93-
/// Derived classes can implement memory consistency rules for simulated
94-
/// processor within this member function.
95-
virtual const InstRef &isReady(const InstRef &IR) const = 0;
248+
bool isReady(const InstRef &IR) const {
249+
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
250+
assert(isValidGroupID(GroupID) &&
251+
"Invalid group associated with this instruction!");
252+
const MemoryGroup &Group = *Groups.find(GroupID)->second;
253+
return Group.isReady();
254+
}
255+
256+
/// Check if a previously dispatched instruction IR only depends on
257+
/// instructions that are currently executing.
258+
bool isPending(const InstRef &IR) const {
259+
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
260+
assert(isValidGroupID(GroupID) &&
261+
"Invalid group associated with this instruction!");
262+
const MemoryGroup &Group = *Groups.find(GroupID)->second;
263+
return Group.isPending();
264+
}
265+
266+
const MemoryGroup &getGroup(unsigned Index) const {
267+
assert(isValidGroupID(Index) && "Group doesn't exist!");
268+
return *Groups.find(Index)->second;
269+
}
270+
271+
MemoryGroup &getGroup(unsigned Index) {
272+
assert(isValidGroupID(Index) && "Group doesn't exist!");
273+
return *Groups.find(Index)->second;
274+
}
275+
276+
unsigned createMemoryGroup() {
277+
Groups.insert(std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
278+
return NextGroupID++;
279+
}
280+
281+
// Instruction executed event handlers.
282+
virtual void onInstructionExecuted(const InstRef &IR);
283+
284+
virtual void onInstructionIssued(const InstRef &IR) {
285+
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
286+
Groups[GroupID]->onInstructionIssued(IR);
287+
}
288+
289+
virtual void cycleEvent();
290+
291+
#ifndef NDEBUG
292+
void dump() const;
293+
#endif
96294
};
97295

98-
/// A Load/Store Unit implementing a load and store queues.
296+
/// Default Load/Store Unit (LS Unit) for simulated processors.
99297
///
100-
/// This class implements a load queue and a store queue to emulate the
101-
/// out-of-order execution of memory operations.
102-
/// Each load (or store) consumes an entry in the load (or store) queue.
298+
/// Each load (or store) consumes one entry in the load (or store) queue.
103299
///
104300
/// Rules are:
105301
/// 1) A younger load is allowed to pass an older load only if there are no
@@ -159,14 +355,6 @@ class LSUnitBase : public HardwareUnit {
159355
/// the load/store queue(s). That also means, all the older loads/stores have
160356
/// already been executed.
161357
class LSUnit : public LSUnitBase {
162-
// When a `MayLoad` instruction is dispatched to the schedulers for execution,
163-
// the LSUnit reserves an entry in the `LoadQueue` for it.
164-
//
165-
// LoadQueue keeps track of all the loads that are in-flight. A load
166-
// instruction is eventually removed from the LoadQueue when it reaches
167-
// completion stage. That means, a load leaves the queue whe it is 'executed',
168-
// and its value can be forwarded on the data path to outside units.
169-
//
170358
// This class doesn't know about the latency of a load instruction. So, it
171359
// conservatively/pessimistically assumes that the latency of a load opcode
172360
// matches the instruction latency.
@@ -197,42 +385,26 @@ class LSUnit : public LSUnitBase {
197385
// alternative approaches that let instructions specify the number of
198386
// load/store queue entries which they consume at dispatch stage (See
199387
// PR39830).
200-
SmallSet<InstRef, 16> LoadQueue;
201-
SmallSet<InstRef, 16> StoreQueue;
202-
203-
void assignLQSlot(const InstRef &IR);
204-
void assignSQSlot(const InstRef &IR);
205-
388+
//
206389
// An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
207390
// conservatively treated as a store barrier. It forces older store to be
208391
// executed before newer stores are issued.
209-
SmallSet<InstRef, 8> StoreBarriers;
210-
392+
//
211393
// An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
212394
// conservatively treated as a load barrier. It forces older loads to execute
213395
// before newer loads are issued.
214-
SmallSet<InstRef, 8> LoadBarriers;
215-
216-
bool isSQEmpty() const { return StoreQueue.empty(); }
217-
bool isLQEmpty() const { return LoadQueue.empty(); }
218-
bool isSQFull() const {
219-
return getStoreQueueSize() != 0 && StoreQueue.size() == getStoreQueueSize();
220-
}
221-
bool isLQFull() const {
222-
return getLoadQueueSize() != 0 && LoadQueue.size() == getLoadQueueSize();
223-
}
396+
unsigned CurrentLoadGroupID;
397+
unsigned CurrentLoadBarrierGroupID;
398+
unsigned CurrentStoreGroupID;
224399

225400
public:
226401
LSUnit(const MCSchedModel &SM)
227402
: LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
228403
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
229404
: LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
230405
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
231-
: LSUnitBase(SM, LQ, SQ, AssumeNoAlias) {}
232-
233-
#ifndef NDEBUG
234-
void dump() const;
235-
#endif
406+
: LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
407+
CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
236408

237409
/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
238410
/// accomodate instruction IR.
@@ -242,9 +414,6 @@ class LSUnit : public LSUnitBase {
242414
///
243415
/// This method assumes that a previous call to `isAvailable(IR)` succeeded
244416
/// returning LSU_AVAILABLE.
245-
void dispatch(const InstRef &IR) override;
246-
247-
/// Check if a peviously dispatched instruction IR is now ready for execution.
248417
///
249418
/// Rules are:
250419
/// By default, rules are:
@@ -254,19 +423,12 @@ class LSUnit : public LSUnitBase {
254423
/// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
255424
/// 5. A load has to wait until an older load barrier is fully executed.
256425
/// 6. A store has to wait until an older store barrier is fully executed.
257-
const InstRef &isReady(const InstRef &IR) const override;
426+
unsigned dispatch(const InstRef &IR) override;
258427

259-
/// Instruction executed event handler.
260-
///
261-
/// Load and store instructions are tracked by their corresponding queues from
262-
/// dispatch until "instruction executed" event.
263-
/// When a load instruction Ld reaches the 'Executed' stage, its value
264-
/// is propagated to all the dependent users, and the LS unit stops tracking
265-
/// Ld.
266-
/// FIXME: For simplicity, we optimistically assume a similar behavior for
267-
/// store instructions. In practice, store operations don't tend to leave the
268-
/// store queue until they reach the 'Retired' stage (See PR39830).
269-
void onInstructionExecuted(const InstRef &IR);
428+
// FIXME: For simplicity, we optimistically assume a similar behavior for
429+
// store instructions. In practice, store operations don't tend to leave the
430+
// store queue until they reach the 'Retired' stage (See PR39830).
431+
void onInstructionExecuted(const InstRef &IR) override;
270432
};
271433

272434
} // namespace mca

llvm/include/llvm/MCA/HardwareUnits/Scheduler.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ class Scheduler : public HardwareUnit {
191191
/// Returns true if instruction IR is ready to be issued to the underlying
192192
/// pipelines. Note that this operation cannot fail; it assumes that a
193193
/// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
194-
bool dispatch(const InstRef &IR);
194+
///
195+
/// If IR is a memory operation, then the Scheduler queries the LS unit to
196+
/// obtain a LS token. An LS token is used internally to track memory
197+
/// dependencies.
198+
bool dispatch(InstRef &IR);
195199

196200
/// Issue an instruction and populates a vector of used pipeline resources,
197201
/// and a vector of instructions that transitioned to the ready state as a

0 commit comments

Comments
 (0)