15
15
#ifndef LLVM_MCA_LSUNIT_H
16
16
#define LLVM_MCA_LSUNIT_H
17
17
18
- #include " llvm/ADT/SmallSet.h"
18
+ #include " llvm/ADT/DenseMap.h"
19
+ #include " llvm/ADT/SmallVector.h"
19
20
#include " llvm/MC/MCSchedule.h"
20
21
#include " llvm/MCA/HardwareUnits/HardwareUnit.h"
21
22
#include " llvm/MCA/Instruction.h"
@@ -25,6 +26,143 @@ namespace mca {
25
26
26
27
class Scheduler ;
27
28
29
+ // / A node of a memory dependency graph. A MemoryGroup describes a set of
30
+ // / instructions with same memory dependencies.
31
+ // /
32
+ // / By construction, instructions of a MemoryGroup don't depend on each other.
33
+ // / At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
34
+ // / A Memory group identifier is then stored as a "token" in field
35
+ // / Instruction::LSUTokenID of each dispatched instructions. That token is used
36
+ // / internally by the LSUnit to track memory dependencies.
37
+ class MemoryGroup {
38
+ unsigned NumPredecessors;
39
+ unsigned NumExecutingPredecessors;
40
+ unsigned NumExecutedPredecessors;
41
+
42
+ unsigned NumInstructions;
43
+ unsigned NumExecuting;
44
+ unsigned NumExecuted;
45
+ SmallVector<MemoryGroup *, 4 > Succ;
46
+
47
+ CriticalDependency CriticalPredecessor;
48
+ InstRef CriticalMemoryInstruction;
49
+
50
+ MemoryGroup (const MemoryGroup &) = delete ;
51
+ MemoryGroup &operator =(const MemoryGroup &) = delete ;
52
+
53
+ public:
54
+ MemoryGroup ()
55
+ : NumPredecessors(0 ), NumExecutingPredecessors(0 ),
56
+ NumExecutedPredecessors (0 ), NumInstructions(0 ), NumExecuting(0 ),
57
+ NumExecuted(0 ), CriticalPredecessor(), CriticalMemoryInstruction() {}
58
+ MemoryGroup (MemoryGroup &&) = default;
59
+
60
+ ArrayRef<MemoryGroup *> getSuccessors () const { return Succ; }
61
+ unsigned getNumSuccessors () const { return Succ.size (); }
62
+ unsigned getNumPredecessors () const { return NumPredecessors; }
63
+ unsigned getNumExecutingPredecessors () const {
64
+ return NumExecutingPredecessors;
65
+ }
66
+ unsigned getNumExecutedPredecessors () const {
67
+ return NumExecutedPredecessors;
68
+ }
69
+ unsigned getNumInstructions () const { return NumInstructions; }
70
+ unsigned getNumExecuting () const { return NumExecuting; }
71
+ unsigned getNumExecuted () const { return NumExecuted; }
72
+
73
+ const InstRef &getCriticalMemoryInstruction () const {
74
+ return CriticalMemoryInstruction;
75
+ }
76
+ const CriticalDependency &getCriticalPredecessor () const {
77
+ return CriticalPredecessor;
78
+ }
79
+
80
+ void addSuccessor (MemoryGroup *Group) {
81
+ Group->NumPredecessors ++;
82
+ assert (!isExecuted () && " Should have been removed!" );
83
+ if (isExecuting ())
84
+ Group->onGroupIssued (CriticalMemoryInstruction);
85
+ Succ.emplace_back (Group);
86
+ }
87
+
88
+ bool isWaiting () const {
89
+ return NumPredecessors >
90
+ (NumExecutingPredecessors + NumExecutedPredecessors);
91
+ }
92
+ bool isPending () const {
93
+ return NumExecutingPredecessors &&
94
+ ((NumExecutedPredecessors + NumExecutingPredecessors) ==
95
+ NumPredecessors);
96
+ }
97
+ bool isReady () const { return NumExecutedPredecessors == NumPredecessors; }
98
+ bool isExecuting () const {
99
+ return NumExecuting == NumInstructions - NumExecuted;
100
+ }
101
+ bool isExecuted () const { return NumInstructions == NumExecuted; }
102
+
103
+ void onGroupIssued (const InstRef &IR) {
104
+ assert (!isReady () && " Unexpected group-start event!" );
105
+ NumExecutingPredecessors++;
106
+
107
+ unsigned Cycles = IR.getInstruction ()->getCyclesLeft ();
108
+ if (CriticalPredecessor.Cycles < Cycles) {
109
+ CriticalPredecessor.IID = IR.getSourceIndex ();
110
+ CriticalPredecessor.Cycles = Cycles;
111
+ }
112
+ }
113
+
114
+ void onGroupExecuted () {
115
+ assert (!isReady () && " Inconsistent state found!" );
116
+ NumExecutingPredecessors--;
117
+ NumExecutedPredecessors++;
118
+ }
119
+
120
+ void onInstructionIssued (const InstRef &IR) {
121
+ assert (!isExecuting () && " Invalid internal state!" );
122
+ ++NumExecuting;
123
+
124
+ // update the CriticalMemDep.
125
+ const Instruction &IS = *IR.getInstruction ();
126
+ if ((bool )CriticalMemoryInstruction) {
127
+ const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction ();
128
+ if (OtherIS.getCyclesLeft () < IS.getCyclesLeft ())
129
+ CriticalMemoryInstruction = IR;
130
+ } else {
131
+ CriticalMemoryInstruction = IR;
132
+ }
133
+
134
+ if (!isExecuting ())
135
+ return ;
136
+
137
+ // Notify successors that this group started execution.
138
+ for (MemoryGroup *MG : Succ)
139
+ MG->onGroupIssued (CriticalMemoryInstruction);
140
+ }
141
+
142
+ void onInstructionExecuted () {
143
+ assert (isReady () && !isExecuted () && " Invalid internal state!" );
144
+ --NumExecuting;
145
+ ++NumExecuted;
146
+
147
+ if (!isExecuted ())
148
+ return ;
149
+
150
+ // Notify successors that this group has finished execution.
151
+ for (MemoryGroup *MG : Succ)
152
+ MG->onGroupExecuted ();
153
+ }
154
+
155
+ void addInstruction () {
156
+ assert (!getNumSuccessors () && " Cannot add instructions to this group!" );
157
+ ++NumInstructions;
158
+ }
159
+
160
+ void cycleEvent () {
161
+ if (CriticalPredecessor.Cycles )
162
+ CriticalPredecessor.Cycles --;
163
+ }
164
+ };
165
+
28
166
// / Abstract base interface for LS (load/store) units in llvm-mca.
29
167
class LSUnitBase : public HardwareUnit {
30
168
// / Load queue size.
@@ -43,13 +181,20 @@ class LSUnitBase : public HardwareUnit {
43
181
// / llvm/Target/TargetSchedule.td).
44
182
unsigned SQSize;
45
183
184
+ unsigned UsedLQEntries;
185
+ unsigned UsedSQEntries;
186
+
46
187
// / True if loads don't alias with stores.
47
188
// /
48
189
// / By default, the LS unit assumes that loads and stores don't alias with
49
190
// / eachother. If this field is set to false, then loads are always assumed to
50
191
// / alias with stores.
51
192
const bool NoAlias;
52
193
194
+ // / Used to map group identifiers to MemoryGroups.
195
+ DenseMap<unsigned , std::unique_ptr<MemoryGroup>> Groups;
196
+ unsigned NextGroupID;
197
+
53
198
public:
54
199
LSUnitBase (const MCSchedModel &SM, unsigned LoadQueueSize,
55
200
unsigned StoreQueueSize, bool AssumeNoAlias);
@@ -62,6 +207,11 @@ class LSUnitBase : public HardwareUnit {
62
207
// / Returns the total number of entries in the store queue.
63
208
unsigned getStoreQueueSize () const { return SQSize; }
64
209
210
+ unsigned getUsedLQEntries () const { return UsedLQEntries; }
211
+ unsigned getUsedSQEntries () const { return UsedSQEntries; }
212
+ unsigned assignLQSlot () { return UsedLQEntries++; }
213
+ unsigned assignSQSlot () { return UsedSQEntries++; }
214
+
65
215
bool assumeNoAlias () const { return NoAlias; }
66
216
67
217
enum Status {
@@ -81,25 +231,71 @@ class LSUnitBase : public HardwareUnit {
81
231
// /
82
232
// / This method assumes that a previous call to `isAvailable(IR)` succeeded
83
233
// / with a LSUnitBase::Status value of LSU_AVAILABLE.
84
- virtual void dispatch (const InstRef &IR) = 0;
234
+ // / Returns the GroupID associated with this instruction. That value will be
235
+ // / used to set the LSUTokenID field in class Instruction.
236
+ virtual unsigned dispatch (const InstRef &IR) = 0;
237
+
238
+ bool isSQEmpty () const { return !UsedSQEntries; }
239
+ bool isLQEmpty () const { return !UsedLQEntries; }
240
+ bool isSQFull () const { return SQSize && SQSize == UsedSQEntries; }
241
+ bool isLQFull () const { return LQSize && LQSize == UsedLQEntries; }
242
+
243
+ bool isValidGroupID (unsigned Index) const {
244
+ return Index && (Groups.find (Index) != Groups.end ());
245
+ }
85
246
86
247
// / Check if a peviously dispatched instruction IR is now ready for execution.
87
- // /
88
- // / Instruction IR is assumed to be a memory operation. If IR is still waiting
89
- // / on another memory instruction M, then M is returned to the caller. If IR
90
- // / depends on more than one memory operations, then this method returns one
91
- // / of them.
92
- // /
93
- // / Derived classes can implement memory consistency rules for simulated
94
- // / processor within this member function.
95
- virtual const InstRef &isReady (const InstRef &IR) const = 0;
248
+ bool isReady (const InstRef &IR) const {
249
+ unsigned GroupID = IR.getInstruction ()->getLSUTokenID ();
250
+ assert (isValidGroupID (GroupID) &&
251
+ " Invalid group associated with this instruction!" );
252
+ const MemoryGroup &Group = *Groups.find (GroupID)->second ;
253
+ return Group.isReady ();
254
+ }
255
+
256
+ // / Check if a previously dispatched instruction IR only depends on
257
+ // / instructions that are currently executing.
258
+ bool isPending (const InstRef &IR) const {
259
+ unsigned GroupID = IR.getInstruction ()->getLSUTokenID ();
260
+ assert (isValidGroupID (GroupID) &&
261
+ " Invalid group associated with this instruction!" );
262
+ const MemoryGroup &Group = *Groups.find (GroupID)->second ;
263
+ return Group.isPending ();
264
+ }
265
+
266
+ const MemoryGroup &getGroup (unsigned Index) const {
267
+ assert (isValidGroupID (Index) && " Group doesn't exist!" );
268
+ return *Groups.find (Index)->second ;
269
+ }
270
+
271
+ MemoryGroup &getGroup (unsigned Index) {
272
+ assert (isValidGroupID (Index) && " Group doesn't exist!" );
273
+ return *Groups.find (Index)->second ;
274
+ }
275
+
276
+ unsigned createMemoryGroup () {
277
+ Groups.insert (std::make_pair (NextGroupID, llvm::make_unique<MemoryGroup>()));
278
+ return NextGroupID++;
279
+ }
280
+
281
+ // Instruction executed event handlers.
282
+ virtual void onInstructionExecuted (const InstRef &IR);
283
+
284
+ virtual void onInstructionIssued (const InstRef &IR) {
285
+ unsigned GroupID = IR.getInstruction ()->getLSUTokenID ();
286
+ Groups[GroupID]->onInstructionIssued (IR);
287
+ }
288
+
289
+ virtual void cycleEvent ();
290
+
291
+ #ifndef NDEBUG
292
+ void dump () const ;
293
+ #endif
96
294
};
97
295
98
- // / A Load/Store Unit implementing a load and store queues .
296
+ // / Default Load/Store Unit (LS Unit) for simulated processors .
99
297
// /
100
- // / This class implements a load queue and a store queue to emulate the
101
- // / out-of-order execution of memory operations.
102
- // / Each load (or store) consumes an entry in the load (or store) queue.
298
+ // / Each load (or store) consumes one entry in the load (or store) queue.
103
299
// /
104
300
// / Rules are:
105
301
// / 1) A younger load is allowed to pass an older load only if there are no
@@ -159,14 +355,6 @@ class LSUnitBase : public HardwareUnit {
159
355
// / the load/store queue(s). That also means, all the older loads/stores have
160
356
// / already been executed.
161
357
class LSUnit : public LSUnitBase {
162
- // When a `MayLoad` instruction is dispatched to the schedulers for execution,
163
- // the LSUnit reserves an entry in the `LoadQueue` for it.
164
- //
165
- // LoadQueue keeps track of all the loads that are in-flight. A load
166
- // instruction is eventually removed from the LoadQueue when it reaches
167
- // completion stage. That means, a load leaves the queue whe it is 'executed',
168
- // and its value can be forwarded on the data path to outside units.
169
- //
170
358
// This class doesn't know about the latency of a load instruction. So, it
171
359
// conservatively/pessimistically assumes that the latency of a load opcode
172
360
// matches the instruction latency.
@@ -197,42 +385,26 @@ class LSUnit : public LSUnitBase {
197
385
// alternative approaches that let instructions specify the number of
198
386
// load/store queue entries which they consume at dispatch stage (See
199
387
// PR39830).
200
- SmallSet<InstRef, 16 > LoadQueue;
201
- SmallSet<InstRef, 16 > StoreQueue;
202
-
203
- void assignLQSlot (const InstRef &IR);
204
- void assignSQSlot (const InstRef &IR);
205
-
388
+ //
206
389
// An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
207
390
// conservatively treated as a store barrier. It forces older store to be
208
391
// executed before newer stores are issued.
209
- SmallSet<InstRef, 8 > StoreBarriers;
210
-
392
+ //
211
393
// An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
212
394
// conservatively treated as a load barrier. It forces older loads to execute
213
395
// before newer loads are issued.
214
- SmallSet<InstRef, 8 > LoadBarriers;
215
-
216
- bool isSQEmpty () const { return StoreQueue.empty (); }
217
- bool isLQEmpty () const { return LoadQueue.empty (); }
218
- bool isSQFull () const {
219
- return getStoreQueueSize () != 0 && StoreQueue.size () == getStoreQueueSize ();
220
- }
221
- bool isLQFull () const {
222
- return getLoadQueueSize () != 0 && LoadQueue.size () == getLoadQueueSize ();
223
- }
396
+ unsigned CurrentLoadGroupID;
397
+ unsigned CurrentLoadBarrierGroupID;
398
+ unsigned CurrentStoreGroupID;
224
399
225
400
public:
226
401
LSUnit (const MCSchedModel &SM)
227
402
: LSUnit(SM, /* LQSize */ 0 , /* SQSize */ 0 , /* NoAlias */ false ) {}
228
403
LSUnit (const MCSchedModel &SM, unsigned LQ, unsigned SQ)
229
404
: LSUnit(SM, LQ, SQ, /* NoAlias */ false ) {}
230
405
LSUnit (const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
231
- : LSUnitBase(SM, LQ, SQ, AssumeNoAlias) {}
232
-
233
- #ifndef NDEBUG
234
- void dump () const ;
235
- #endif
406
+ : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0 ),
407
+ CurrentLoadBarrierGroupID (0 ), CurrentStoreGroupID(0 ) {}
236
408
237
409
// / Returns LSU_AVAILABLE if there are enough load/store queue entries to
238
410
// / accomodate instruction IR.
@@ -242,9 +414,6 @@ class LSUnit : public LSUnitBase {
242
414
// /
243
415
// / This method assumes that a previous call to `isAvailable(IR)` succeeded
244
416
// / returning LSU_AVAILABLE.
245
- void dispatch (const InstRef &IR) override ;
246
-
247
- // / Check if a peviously dispatched instruction IR is now ready for execution.
248
417
// /
249
418
// / Rules are:
250
419
// / By default, rules are:
@@ -254,19 +423,12 @@ class LSUnit : public LSUnitBase {
254
423
// / 4. A store may not pass a previous load (regardless of flag 'NoAlias').
255
424
// / 5. A load has to wait until an older load barrier is fully executed.
256
425
// / 6. A store has to wait until an older store barrier is fully executed.
257
- const InstRef & isReady (const InstRef &IR) const override ;
426
+ unsigned dispatch (const InstRef &IR) override ;
258
427
259
- // / Instruction executed event handler.
260
- // /
261
- // / Load and store instructions are tracked by their corresponding queues from
262
- // / dispatch until "instruction executed" event.
263
- // / When a load instruction Ld reaches the 'Executed' stage, its value
264
- // / is propagated to all the dependent users, and the LS unit stops tracking
265
- // / Ld.
266
- // / FIXME: For simplicity, we optimistically assume a similar behavior for
267
- // / store instructions. In practice, store operations don't tend to leave the
268
- // / store queue until they reach the 'Retired' stage (See PR39830).
269
- void onInstructionExecuted (const InstRef &IR);
428
+ // FIXME: For simplicity, we optimistically assume a similar behavior for
429
+ // store instructions. In practice, store operations don't tend to leave the
430
+ // store queue until they reach the 'Retired' stage (See PR39830).
431
+ void onInstructionExecuted (const InstRef &IR) override ;
270
432
};
271
433
272
434
} // namespace mca
0 commit comments