Skip to content

Commit bcff47d

Browse files
mtrofintmsri
authored andcommitted
[ctx_prof] Fix checks in PGOCtxprofFlattening (llvm#108467)
The assertion that all out-edges of a BB can't be 0 is incorrect: they can be, if that branch is on a cold subgraph. Added validators and asserts about the expected proprerties of the propagated counters.
1 parent 509e390 commit bcff47d

File tree

3 files changed

+148
-26
lines changed

3 files changed

+148
-26
lines changed

llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp

Lines changed: 86 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/IR/Analysis.h"
2727
#include "llvm/IR/CFG.h"
2828
#include "llvm/IR/Dominators.h"
29+
#include "llvm/IR/Instructions.h"
2930
#include "llvm/IR/IntrinsicInst.h"
3031
#include "llvm/IR/Module.h"
3132
#include "llvm/IR/PassManager.h"
@@ -34,6 +35,7 @@
3435
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
3536
#include "llvm/Transforms/Scalar/DCE.h"
3637
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38+
#include <deque>
3739

3840
using namespace llvm;
3941

@@ -51,29 +53,42 @@ class ProfileAnnotator final {
5153

5254
class BBInfo {
5355
std::optional<uint64_t> Count;
56+
// OutEdges is dimensioned to match the number of terminator operands.
57+
// Entries in the vector match the index in the terminator operand list. In
58+
// some cases - see `shouldExcludeEdge` and its implementation - an entry
59+
// will be nullptr.
60+
// InEdges doesn't have the above constraint.
5461
SmallVector<EdgeInfo *> OutEdges;
5562
SmallVector<EdgeInfo *> InEdges;
5663
size_t UnknownCountOutEdges = 0;
5764
size_t UnknownCountInEdges = 0;
5865

5966
// Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
6067
// because all the edge counters must be known.
61-
uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
62-
bool AssumeAllKnown) const {
63-
uint64_t Sum = 0;
64-
for (const auto *E : Edges)
65-
if (E)
66-
Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
68+
// Return std::nullopt if there were no edges to sum. The user can decide
69+
// how to interpret that.
70+
std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
71+
bool AssumeAllKnown) const {
72+
std::optional<uint64_t> Sum;
73+
for (const auto *E : Edges) {
74+
// `Edges` may be `OutEdges`, case in which `E` could be nullptr.
75+
if (E) {
76+
if (!Sum.has_value())
77+
Sum = 0;
78+
*Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
79+
}
80+
}
6781
return Sum;
6882
}
6983

70-
void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
84+
bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
7185
assert(!Count.has_value());
7286
Count = getEdgeSum(Edges, true);
87+
return Count.has_value();
7388
}
7489

7590
void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
76-
uint64_t KnownSum = getEdgeSum(Edges, false);
91+
uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
7792
uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
7893
EdgeInfo *E = nullptr;
7994
for (auto *I : Edges)
@@ -110,17 +125,15 @@ class ProfileAnnotator final {
110125
}
111126

112127
bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
113-
if (!succ_empty(&BB) && !UnknownCountOutEdges) {
114-
computeCountFrom(OutEdges);
115-
return true;
128+
if (!UnknownCountOutEdges) {
129+
return computeCountFrom(OutEdges);
116130
}
117131
return false;
118132
}
119133

120134
bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
121-
if (!BB.isEntryBlock() && !UnknownCountInEdges) {
122-
computeCountFrom(InEdges);
123-
return true;
135+
if (!UnknownCountInEdges) {
136+
return computeCountFrom(InEdges);
124137
}
125138
return false;
126139
}
@@ -178,7 +191,7 @@ class ProfileAnnotator final {
178191
bool KeepGoing = true;
179192
while (KeepGoing) {
180193
KeepGoing = false;
181-
for (const auto &BB : reverse(F)) {
194+
for (const auto &BB : F) {
182195
auto &Info = getBBInfo(BB);
183196
if (!Info.hasCount())
184197
KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
@@ -198,6 +211,52 @@ class ProfileAnnotator final {
198211

199212
BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
200213

214+
const BBInfo &getBBInfo(const BasicBlock &BB) const {
215+
return BBInfos.find(&BB)->second;
216+
}
217+
218+
// validation function after we propagate the counters: all BBs and edges'
219+
// counters must have a value.
220+
bool allCountersAreAssigned() const {
221+
for (const auto &BBInfo : BBInfos)
222+
if (!BBInfo.second.hasCount())
223+
return false;
224+
for (const auto &EdgeInfo : EdgeInfos)
225+
if (!EdgeInfo.Count.has_value())
226+
return false;
227+
return true;
228+
}
229+
230+
/// Check that all paths from the entry basic block that use edges with
231+
/// non-zero counts arrive at a basic block with no successors (i.e. "exit")
232+
bool allTakenPathsExit() const {
233+
std::deque<const BasicBlock *> Worklist;
234+
DenseSet<const BasicBlock *> Visited;
235+
Worklist.push_back(&F.getEntryBlock());
236+
Visited.insert(&F.getEntryBlock());
237+
while (!Worklist.empty()) {
238+
const auto *BB = Worklist.front();
239+
Worklist.pop_front();
240+
if (succ_size(BB) <= 1)
241+
continue;
242+
const auto &BBInfo = getBBInfo(*BB);
243+
bool Inserted = false;
244+
for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
245+
const auto *Succ = BB->getTerminator()->getSuccessor(I);
246+
if (!shouldExcludeEdge(*BB, *Succ)) {
247+
if (BBInfo.getEdgeCount(I) > 0)
248+
if (Visited.insert(Succ).second) {
249+
Worklist.push_back(Succ);
250+
Inserted = true;
251+
}
252+
}
253+
}
254+
if (!Inserted)
255+
return false;
256+
}
257+
return true;
258+
}
259+
201260
public:
202261
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
203262
InstrProfSummaryBuilder &PB)
@@ -216,6 +275,9 @@ class ProfileAnnotator final {
216275
"profile is managed by IPO transforms");
217276
(void)Index;
218277
Count = Counters[Ins->getIndex()->getZExtValue()];
278+
} else if (isa<UnreachableInst>(BB.getTerminator())) {
279+
// The program presumably didn't crash.
280+
Count = 0;
219281
}
220282
auto [It, Ins] =
221283
BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
@@ -268,14 +330,16 @@ class ProfileAnnotator final {
268330
PB.addInternalCount(EdgeCount);
269331
}
270332

271-
if (MaxCount == 0)
272-
F.getContext().emitError(
273-
"[ctx-prof] Encountered a BB with more than one successor, where "
274-
"all outgoing edges have a 0 count. This occurs in non-exiting "
275-
"functions (message pumps, usually) which are not supported in the "
276-
"contextual profiling case");
277-
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
333+
if (MaxCount != 0)
334+
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
278335
}
336+
assert(allCountersAreAssigned() &&
337+
"Expected all counters have been assigned.");
338+
assert(allTakenPathsExit() &&
339+
"[ctx-prof] Encountered a BB with more than one successor, where "
340+
"all outgoing edges have a 0 count. This occurs in non-exiting "
341+
"functions (message pumps, usually) which are not supported in the "
342+
"contextual profiling case");
279343
}
280344
};
281345

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
2+
; RUN: split-file %s %t
3+
; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
4+
; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
5+
6+
; CHECK-LABEL: entry:
7+
; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
8+
; CHECK-LABEL: no:
9+
; CHECK-NOT: !prof
10+
; CHECK-LABEL: no1:
11+
; CHECK-NOT: !prof
12+
; CHECK-LABEL: no2:
13+
; CHECK-NOT: !prof
14+
; CHECK-LABEL: yes:
15+
; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
16+
; CHECK-NOT: !prof
17+
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
18+
19+
;--- example.ll
20+
define void @f1(i32 %cond) !guid !0 {
21+
entry:
22+
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)
23+
%t = icmp eq i32 %cond, 1
24+
br i1 %t, label %yes, label %no
25+
26+
no:
27+
%t2 = icmp eq i32 %cond, 2
28+
br i1 %t2, label %no1, label %no2
29+
no1:
30+
unreachable
31+
no2:
32+
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 1)
33+
unreachable
34+
yes:
35+
%t3 = icmp eq i32 %cond, 3
36+
br i1 %t3, label %yes1, label %yes2
37+
yes1:
38+
br label %exit
39+
yes2:
40+
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 2)
41+
%t4 = icmp eq i32 %cond, 4
42+
br i1 %t4, label %yes3, label %yes4
43+
yes3:
44+
br label %exit
45+
yes4:
46+
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 3)
47+
unreachable
48+
exit:
49+
ret void
50+
}
51+
52+
!0 = !{i64 1234}
53+
54+
;--- profile.json
55+
[{"Guid":1234, "Counters":[6,0,0,0]}]

llvm/test/ThinLTO/X86/ctxprof.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,21 @@
4747
; NOPROF-1-NOT: m2_f1()
4848
; NOPROF-2-NOT: m1_f1()
4949
;
50-
; The run with workload definitions - same other options.
50+
; The run with workload definitions - same other options. We do need to re-generate the .bc
51+
; files, to include instrumentation.
52+
; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m1.ll -o %t/m1-instr.bc
53+
; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m2.ll -o %t/m2-instr.bc
5154
;
5255
; RUN: echo '[ \
5356
; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \
5457
; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \
5558
; RUN: ]' > %t_exp/ctxprof.json
5659
; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream
57-
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
60+
; RUN: llvm-lto2 run %t/m1-instr.bc %t/m2-instr.bc \
5861
; RUN: -o %t_exp/result.o -save-temps \
5962
; RUN: -use-ctx-profile=%t_exp/ctxprof.bitstream \
60-
; RUN: -r %t/m1.bc,m1_f1,plx \
61-
; RUN: -r %t/m2.bc,m2_f1,plx
63+
; RUN: -r %t/m1-instr.bc,m1_f1,plx \
64+
; RUN: -r %t/m2-instr.bc,m2_f1,plx
6265
; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
6366
; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
6467
;

0 commit comments

Comments
 (0)