Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 82d654f

Browse files
committed
[llvm-mca][TimelineView] Force the same number of executions for every entry in the 'wait-times' table.
This patch also uses colors to highlight problematic wait-time entries. A problematic entry is an entry with an high wait time that tends to match (or exceed) the size of the scheduler's buffer. Color RED is used if an instruction had to wait an average number of cycles which is bigger than (or equal to) the size of the underlying scheduler's buffer. Color YELLOW is used if the time (in cycles) spend waiting for the operands or pipeline resources is bigger than half the size of the underlying scheduler's buffer. Color MAGENTA is used if an instruction does not consume buffer resources according to the scheduling model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340825 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0977b80 commit 82d654f

File tree

5 files changed

+125
-89
lines changed

5 files changed

+125
-89
lines changed

test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,17 @@ vandps %xmm4, %xmm1, %xmm0
104104
# CHECK-NEXT: 1. 2 9.5 0.5 35.5 vaddps %xmm0, %xmm1, %xmm3
105105
# CHECK-NEXT: 2. 2 11.5 0.0 33.5 vaddps %ymm3, %ymm1, %ymm4
106106
# CHECK-NEXT: 3. 2 12.5 2.0 31.5 vaddps %ymm3, %ymm1, %ymm4
107-
# CHECK-NEXT: 4. 1 5.0 4.0 29.0 vaddps %ymm3, %ymm1, %ymm4
108-
# CHECK-NEXT: 5. 1 6.0 6.0 27.0 vaddps %ymm3, %ymm1, %ymm4
109-
# CHECK-NEXT: 6. 1 7.0 7.0 26.0 vaddps %ymm3, %ymm1, %ymm4
110-
# CHECK-NEXT: 7. 1 8.0 8.0 24.0 vaddps %ymm3, %ymm1, %ymm4
111-
# CHECK-NEXT: 8. 1 9.0 9.0 23.0 vaddps %ymm3, %ymm1, %ymm4
112-
# CHECK-NEXT: 9. 1 10.0 10.0 21.0 vaddps %ymm3, %ymm1, %ymm4
113-
# CHECK-NEXT: 10. 1 11.0 11.0 20.0 vaddps %ymm3, %ymm1, %ymm4
114-
# CHECK-NEXT: 11. 1 12.0 12.0 18.0 vaddps %ymm3, %ymm1, %ymm4
115-
# CHECK-NEXT: 12. 1 13.0 13.0 17.0 vaddps %ymm3, %ymm1, %ymm4
116-
# CHECK-NEXT: 13. 1 14.0 14.0 15.0 vaddps %ymm3, %ymm1, %ymm4
117-
# CHECK-NEXT: 14. 1 15.0 15.0 14.0 vaddps %ymm3, %ymm1, %ymm4
118-
# CHECK-NEXT: 15. 1 16.0 16.0 12.0 vaddps %ymm3, %ymm1, %ymm4
119-
# CHECK-NEXT: 16. 1 17.0 17.0 11.0 vaddps %ymm3, %ymm1, %ymm4
120-
# CHECK-NEXT: 17. 1 19.0 0.0 10.0 vandps %xmm4, %xmm1, %xmm0
107+
# CHECK-NEXT: 4. 2 13.5 4.0 30.5 vaddps %ymm3, %ymm1, %ymm4
108+
# CHECK-NEXT: 5. 2 14.5 6.0 28.5 vaddps %ymm3, %ymm1, %ymm4
109+
# CHECK-NEXT: 6. 2 15.5 7.5 27.5 vaddps %ymm3, %ymm1, %ymm4
110+
# CHECK-NEXT: 7. 2 16.5 9.0 25.5 vaddps %ymm3, %ymm1, %ymm4
111+
# CHECK-NEXT: 8. 2 17.5 10.5 24.5 vaddps %ymm3, %ymm1, %ymm4
112+
# CHECK-NEXT: 9. 2 18.5 12.0 22.5 vaddps %ymm3, %ymm1, %ymm4
113+
# CHECK-NEXT: 10. 2 19.5 13.5 21.5 vaddps %ymm3, %ymm1, %ymm4
114+
# CHECK-NEXT: 11. 2 20.5 15.0 19.5 vaddps %ymm3, %ymm1, %ymm4
115+
# CHECK-NEXT: 12. 2 21.5 16.5 18.5 vaddps %ymm3, %ymm1, %ymm4
116+
# CHECK-NEXT: 13. 2 22.5 18.0 16.5 vaddps %ymm3, %ymm1, %ymm4
117+
# CHECK-NEXT: 14. 2 23.5 19.5 15.5 vaddps %ymm3, %ymm1, %ymm4
118+
# CHECK-NEXT: 15. 2 21.0 21.0 13.5 vaddps %ymm3, %ymm1, %ymm4
119+
# CHECK-NEXT: 16. 2 22.0 22.0 12.5 vaddps %ymm3, %ymm1, %ymm4
120+
# CHECK-NEXT: 17. 2 24.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0

test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,6 @@ vsqrtps %ymm0, %ymm2
103103
# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2
104104
# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
105105
# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2
106-
# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2
107-
# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2
108-
# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2
106+
# CHECK-NEXT: 5. 2 29.5 29.5 0.0 vsqrtps %xmm0, %xmm2
107+
# CHECK-NEXT: 6. 2 1.0 1.0 45.5 vaddps %ymm0, %ymm1, %ymm2
108+
# CHECK-NEXT: 7. 2 48.5 48.5 0.0 vsqrtps %ymm0, %ymm2

test/tools/llvm-mca/X86/BtVer2/pr37790.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,5 @@ stmxcsr (%rsp)
3737
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
3838

3939
# CHECK: [0] [1] [2] [3]
40-
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 int3
41-
# CHECK-NEXT: 1. 1 101.0 0.0 0.0 stmxcsr (%rsp)
40+
# CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3
41+
# CHECK-NEXT: 1. 2 100.5 0.0 0.0 stmxcsr (%rsp)

tools/llvm-mca/Views/TimelineView.cpp

Lines changed: 100 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,63 @@ using namespace llvm;
1818

1919
namespace mca {
2020

21-
void TimelineView::initialize(unsigned MaxIterations) {
22-
unsigned NumInstructions =
23-
AsmSequence.getNumIterations() * AsmSequence.size();
21+
TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
22+
const SourceMgr &S, unsigned MaxIterations,
23+
unsigned Cycles)
24+
: STI(sti), MCIP(Printer), AsmSequence(S), CurrentCycle(0),
25+
MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
26+
UsedBuffer(S.size()) {
27+
unsigned NumInstructions = AsmSequence.size();
2428
if (!MaxIterations)
2529
MaxIterations = DEFAULT_ITERATIONS;
26-
unsigned NumEntries =
27-
std::min(NumInstructions, MaxIterations * AsmSequence.size());
28-
Timeline.resize(NumEntries);
29-
TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0};
30-
std::fill(Timeline.begin(), Timeline.end(), NullTVEntry);
31-
32-
WaitTime.resize(AsmSequence.size());
33-
WaitTimeEntry NullWTEntry = {0, 0, 0, 0};
30+
NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations());
31+
Timeline.resize(NumInstructions);
32+
33+
WaitTimeEntry NullWTEntry = {0, 0, 0};
3434
std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
3535
}
3636

37+
void TimelineView::onReservedBuffers(const InstRef &IR,
38+
ArrayRef<unsigned> Buffers) {
39+
if (IR.getSourceIndex() >= AsmSequence.size())
40+
return;
41+
42+
const MCSchedModel &SM = STI.getSchedModel();
43+
std::pair<unsigned, unsigned> BufferInfo = {0, 0};
44+
for (const unsigned Buffer : Buffers) {
45+
const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
46+
if (MCDesc.BufferSize <= 0)
47+
continue;
48+
unsigned OtherSize = static_cast<unsigned>(MCDesc.BufferSize);
49+
if (!BufferInfo.first || BufferInfo.second > OtherSize) {
50+
BufferInfo.first = Buffer;
51+
BufferInfo.second = OtherSize;
52+
}
53+
}
54+
55+
UsedBuffer[IR.getSourceIndex()] = BufferInfo;
56+
}
57+
3758
void TimelineView::onEvent(const HWInstructionEvent &Event) {
3859
const unsigned Index = Event.IR.getSourceIndex();
39-
if (CurrentCycle >= MaxCycle || Index >= Timeline.size())
60+
if (Index >= Timeline.size())
4061
return;
62+
4163
switch (Event.Type) {
4264
case HWInstructionEvent::Retired: {
4365
TimelineViewEntry &TVEntry = Timeline[Index];
44-
TVEntry.CycleRetired = CurrentCycle;
66+
if (CurrentCycle < MaxCycle)
67+
TVEntry.CycleRetired = CurrentCycle;
4568

4669
// Update the WaitTime entry which corresponds to this Index.
4770
WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
48-
WTEntry.Executions++;
4971
WTEntry.CyclesSpentInSchedulerQueue +=
5072
TVEntry.CycleIssued - TVEntry.CycleDispatched;
5173
assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
5274
WTEntry.CyclesSpentInSQWhileReady +=
5375
TVEntry.CycleIssued - TVEntry.CycleReady;
5476
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
55-
(TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted;
77+
(CurrentCycle - 1) - TVEntry.CycleExecuted;
5678
break;
5779
}
5880
case HWInstructionEvent::Ready:
@@ -70,57 +92,83 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
7092
default:
7193
return;
7294
}
73-
LastCycle = std::max(LastCycle, CurrentCycle);
95+
if (CurrentCycle < MaxCycle)
96+
LastCycle = std::max(LastCycle, CurrentCycle);
97+
}
98+
99+
static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
100+
unsigned Executions,
101+
unsigned BufferSize) {
102+
if (CumulativeCycles && BufferSize == 0)
103+
return raw_ostream::MAGENTA;
104+
if (CumulativeCycles >= (BufferSize * Executions))
105+
return raw_ostream::RED;
106+
if ((CumulativeCycles * 2) >= (BufferSize * Executions))
107+
return raw_ostream::YELLOW;
108+
return raw_ostream::SAVEDCOLOR;
109+
}
110+
111+
static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
112+
unsigned Executions, unsigned BufferSize) {
113+
if (!OS.has_colors())
114+
return;
115+
116+
raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
117+
if (Color == raw_ostream::SAVEDCOLOR) {
118+
OS.resetColor();
119+
return;
120+
}
121+
OS.changeColor(Color, /* bold */ true, /* BG */ false);
74122
}
75123

76124
void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
77125
const WaitTimeEntry &Entry,
78-
unsigned SourceIndex) const {
126+
unsigned SourceIndex,
127+
unsigned Executions) const {
79128
OS << SourceIndex << '.';
80129
OS.PadToColumn(7);
81130

82-
if (Entry.Executions == 0) {
83-
OS << "- - - - ";
84-
} else {
85-
double AverageTime1, AverageTime2, AverageTime3;
86-
unsigned Executions = Entry.Executions;
87-
AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
88-
AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
89-
AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
90-
91-
OS << Executions;
92-
OS.PadToColumn(13);
93-
94-
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
95-
OS.PadToColumn(20);
96-
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
97-
OS.PadToColumn(27);
98-
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
99-
OS.PadToColumn(34);
100-
}
101-
}
131+
double AverageTime1, AverageTime2, AverageTime3;
132+
AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
133+
AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
134+
AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
102135

103-
void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
104-
if (WaitTime.empty())
105-
return;
136+
OS << Executions;
137+
OS.PadToColumn(13);
138+
unsigned BufferSize = UsedBuffer[SourceIndex].second;
139+
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
140+
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
141+
OS.PadToColumn(20);
142+
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
143+
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
144+
OS.PadToColumn(27);
145+
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
146+
STI.getSchedModel().MicroOpBufferSize);
147+
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
106148

107-
std::string Buffer;
108-
raw_string_ostream TempStream(Buffer);
109-
formatted_raw_ostream FOS(TempStream);
149+
if (OS.has_colors())
150+
OS.resetColor();
151+
OS.PadToColumn(34);
152+
}
110153

111-
FOS << "\n\nAverage Wait times (based on the timeline view):\n"
112-
<< "[0]: Executions\n"
113-
<< "[1]: Average time spent waiting in a scheduler's queue\n"
114-
<< "[2]: Average time spent waiting in a scheduler's queue while ready\n"
115-
<< "[3]: Average time elapsed from WB until retire stage\n\n";
116-
FOS << " [0] [1] [2] [3]\n";
154+
void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
155+
std::string Header =
156+
"\n\nAverage Wait times (based on the timeline view):\n"
157+
"[0]: Executions\n"
158+
"[1]: Average time spent waiting in a scheduler's queue\n"
159+
"[2]: Average time spent waiting in a scheduler's queue while ready\n"
160+
"[3]: Average time elapsed from WB until retire stage\n\n"
161+
" [0] [1] [2] [3]\n";
162+
OS << Header;
117163

118-
// Use a different string stream for the instruction.
164+
// Use a different string stream for printing instructions.
119165
std::string Instruction;
120166
raw_string_ostream InstrStream(Instruction);
121167

168+
formatted_raw_ostream FOS(OS);
169+
unsigned Executions = Timeline.size() / AsmSequence.size();
122170
for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) {
123-
printWaitTimeEntry(FOS, WaitTime[I], I);
171+
printWaitTimeEntry(FOS, WaitTime[I], I, Executions);
124172
// Append the instruction info at the end of the line.
125173
const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
126174

@@ -133,9 +181,6 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
133181
FOS << " " << Str << '\n';
134182
FOS.flush();
135183
Instruction = "";
136-
137-
OS << Buffer;
138-
Buffer = "";
139184
}
140185
}
141186

@@ -202,20 +247,15 @@ static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
202247
}
203248

204249
void TimelineView::printTimeline(raw_ostream &OS) const {
205-
std::string Buffer;
206-
raw_string_ostream StringStream(Buffer);
207-
formatted_raw_ostream FOS(StringStream);
208-
250+
formatted_raw_ostream FOS(OS);
209251
printTimelineHeader(FOS, LastCycle);
210252
FOS.flush();
211-
OS << Buffer;
212253

213254
// Use a different string stream for the instruction.
214255
std::string Instruction;
215256
raw_string_ostream InstrStream(Instruction);
216257

217258
for (unsigned I = 0, E = Timeline.size(); I < E; ++I) {
218-
Buffer = "";
219259
const TimelineViewEntry &Entry = Timeline[I];
220260
if (Entry.CycleRetired == 0)
221261
return;
@@ -234,7 +274,6 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
234274
FOS << " " << Str << '\n';
235275
FOS.flush();
236276
Instruction = "";
237-
OS << Buffer;
238277
}
239278
}
240279
} // namespace mca

tools/llvm-mca/Views/TimelineView.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,23 +135,22 @@ class TimelineView : public View {
135135
std::vector<TimelineViewEntry> Timeline;
136136

137137
struct WaitTimeEntry {
138-
unsigned Executions;
139138
unsigned CyclesSpentInSchedulerQueue;
140139
unsigned CyclesSpentInSQWhileReady;
141140
unsigned CyclesSpentAfterWBAndBeforeRetire;
142141
};
143142
std::vector<WaitTimeEntry> WaitTime;
143+
std::vector<std::pair<unsigned, unsigned>> UsedBuffer;
144144

145145
void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
146146
const TimelineViewEntry &E, unsigned Iteration,
147147
unsigned SourceIndex) const;
148148
void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
149-
const WaitTimeEntry &E, unsigned Index) const;
149+
const WaitTimeEntry &E, unsigned Index,
150+
unsigned Executions) const;
150151

151152
const unsigned DEFAULT_ITERATIONS = 10;
152153

153-
void initialize(unsigned MaxIterations);
154-
155154
// Display characters for the TimelineView report output.
156155
struct DisplayChar {
157156
static const char Dispatched = 'D';
@@ -165,15 +164,13 @@ class TimelineView : public View {
165164
public:
166165
TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
167166
const SourceMgr &Sequence, unsigned MaxIterations,
168-
unsigned Cycles)
169-
: STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0),
170-
MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) {
171-
initialize(MaxIterations);
172-
}
167+
unsigned Cycles);
173168

174169
// Event handlers.
175170
void onCycleEnd() override { ++CurrentCycle; }
176171
void onEvent(const HWInstructionEvent &Event) override;
172+
void onReservedBuffers(const InstRef &IR,
173+
llvm::ArrayRef<unsigned> Buffers) override;
177174

178175
// print functionalities.
179176
void printTimeline(llvm::raw_ostream &OS) const;

0 commit comments

Comments
 (0)