-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctx_prof] Fix checks in PGOCtxprofFlattening
#108467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e70420e
af63b6e
02735cf
b44c43d
a2884f6
9bb2b79
442309e
87829be
d791d87
9c9713c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
#include "llvm/IR/Analysis.h" | ||
#include "llvm/IR/CFG.h" | ||
#include "llvm/IR/Dominators.h" | ||
#include "llvm/IR/Instructions.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/PassManager.h" | ||
|
@@ -34,6 +35,7 @@ | |
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" | ||
#include "llvm/Transforms/Scalar/DCE.h" | ||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||
#include <deque> | ||
|
||
using namespace llvm; | ||
|
||
|
@@ -51,29 +53,42 @@ class ProfileAnnotator final { | |
|
||
class BBInfo { | ||
std::optional<uint64_t> Count; | ||
// OutEdges is dimensioned to match the number of terminator operands. | ||
// Entries in the vector match the index in the terminator operand list. In | ||
// some cases - see `shouldExcludeEdge` and its implementation - an entry | ||
// will be nullptr. | ||
// InEdges doesn't have the above constraint. | ||
SmallVector<EdgeInfo *> OutEdges; | ||
SmallVector<EdgeInfo *> InEdges; | ||
size_t UnknownCountOutEdges = 0; | ||
size_t UnknownCountInEdges = 0; | ||
|
||
// Pass AssumeAllKnown when we try to propagate counts from edges to BBs - | ||
// because all the edge counters must be known. | ||
uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges, | ||
bool AssumeAllKnown) const { | ||
uint64_t Sum = 0; | ||
for (const auto *E : Edges) | ||
if (E) | ||
Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U); | ||
// Return std::nullopt if there were no edges to sum. The user can decide | ||
// how to interpret that. | ||
std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges, | ||
bool AssumeAllKnown) const { | ||
std::optional<uint64_t> Sum; | ||
for (const auto *E : Edges) { | ||
// `Edges` may be `OutEdges`, case in which `E` could be nullptr. | ||
if (E) { | ||
if (!Sum.has_value()) | ||
Sum = 0; | ||
*Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U)); | ||
} | ||
} | ||
return Sum; | ||
} | ||
|
||
void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) { | ||
bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) { | ||
assert(!Count.has_value()); | ||
Count = getEdgeSum(Edges, true); | ||
return Count.has_value(); | ||
} | ||
|
||
void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) { | ||
uint64_t KnownSum = getEdgeSum(Edges, false); | ||
uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U); | ||
uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; | ||
EdgeInfo *E = nullptr; | ||
for (auto *I : Edges) | ||
|
@@ -110,17 +125,15 @@ class ProfileAnnotator final { | |
} | ||
|
||
bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { | ||
if (!succ_empty(&BB) && !UnknownCountOutEdges) { | ||
computeCountFrom(OutEdges); | ||
return true; | ||
if (!UnknownCountOutEdges) { | ||
return computeCountFrom(OutEdges); | ||
} | ||
return false; | ||
} | ||
|
||
bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { | ||
if (!BB.isEntryBlock() && !UnknownCountInEdges) { | ||
computeCountFrom(InEdges); | ||
return true; | ||
if (!UnknownCountInEdges) { | ||
return computeCountFrom(InEdges); | ||
} | ||
return false; | ||
} | ||
|
@@ -178,7 +191,7 @@ class ProfileAnnotator final { | |
bool KeepGoing = true; | ||
while (KeepGoing) { | ||
KeepGoing = false; | ||
for (const auto &BB : reverse(F)) { | ||
for (const auto &BB : F) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We always have the entry BB's count, so it makes more sense to traverse top down. |
||
auto &Info = getBBInfo(BB); | ||
if (!Info.hasCount()) | ||
KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || | ||
|
@@ -198,6 +211,52 @@ class ProfileAnnotator final { | |
|
||
BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } | ||
|
||
const BBInfo &getBBInfo(const BasicBlock &BB) const { | ||
return BBInfos.find(&BB)->second; | ||
} | ||
|
||
// validation function after we propagate the counters: all BBs and edges' | ||
// counters must have a value. | ||
bool allCountersAreAssigned() const { | ||
for (const auto &BBInfo : BBInfos) | ||
if (!BBInfo.second.hasCount()) | ||
return false; | ||
for (const auto &EdgeInfo : EdgeInfos) | ||
if (!EdgeInfo.Count.has_value()) | ||
return false; | ||
return true; | ||
} | ||
|
||
/// Check that all paths from the entry basic block that use edges with | ||
/// non-zero counts arrive at a basic block with no successors (i.e. "exit") | ||
bool allTakenPathsExit() const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a description for the function. Also make it static? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doc done. re. static, same as above - it uses private members of this class. |
||
std::deque<const BasicBlock *> Worklist; | ||
DenseSet<const BasicBlock *> Visited; | ||
Worklist.push_back(&F.getEntryBlock()); | ||
Visited.insert(&F.getEntryBlock()); | ||
while (!Worklist.empty()) { | ||
const auto *BB = Worklist.front(); | ||
Worklist.pop_front(); | ||
if (succ_size(BB) <= 1) | ||
continue; | ||
const auto &BBInfo = getBBInfo(*BB); | ||
bool Inserted = false; | ||
for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) { | ||
const auto *Succ = BB->getTerminator()->getSuccessor(I); | ||
if (!shouldExcludeEdge(*BB, *Succ)) { | ||
if (BBInfo.getEdgeCount(I) > 0) | ||
if (Visited.insert(Succ).second) { | ||
Worklist.push_back(Succ); | ||
Inserted = true; | ||
} | ||
} | ||
} | ||
if (!Inserted) | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
public: | ||
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters, | ||
InstrProfSummaryBuilder &PB) | ||
|
@@ -216,6 +275,9 @@ class ProfileAnnotator final { | |
"profile is managed by IPO transforms"); | ||
(void)Index; | ||
Count = Counters[Ins->getIndex()->getZExtValue()]; | ||
} else if (isa<UnreachableInst>(BB.getTerminator())) { | ||
// The program presumably didn't crash. | ||
Count = 0; | ||
} | ||
auto [It, Ins] = | ||
BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); | ||
|
@@ -268,14 +330,16 @@ class ProfileAnnotator final { | |
PB.addInternalCount(EdgeCount); | ||
} | ||
|
||
if (MaxCount == 0) | ||
F.getContext().emitError( | ||
"[ctx-prof] Encountered a BB with more than one successor, where " | ||
"all outgoing edges have a 0 count. This occurs in non-exiting " | ||
"functions (message pumps, usually) which are not supported in the " | ||
"contextual profiling case"); | ||
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); | ||
if (MaxCount != 0) | ||
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); | ||
} | ||
assert(allCountersAreAssigned() && | ||
"Expected all counters have been assigned."); | ||
assert(allTakenPathsExit() && | ||
"[ctx-prof] Encountered a BB with more than one successor, where " | ||
"all outgoing edges have a 0 count. This occurs in non-exiting " | ||
"functions (message pumps, usually) which are not supported in the " | ||
"contextual profiling case"); | ||
} | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
; Check that flattened profile lowering handles cold subgraphs that end in "unreachable" | ||
; RUN: split-file %s %t | ||
; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s | ||
|
||
; CHECK-LABEL: entry: | ||
; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]] | ||
; CHECK-LABEL: no: | ||
; CHECK-NOT: !prof | ||
; CHECK-LABEL: no1: | ||
; CHECK-NOT: !prof | ||
; CHECK-LABEL: no2: | ||
; CHECK-NOT: !prof | ||
; CHECK-LABEL: yes: | ||
; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]] | ||
; CHECK-NOT: !prof | ||
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0} | ||
|
||
;--- example.ll | ||
define void @f1(i32 %cond) !guid !0 { | ||
entry: | ||
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0) | ||
%t = icmp eq i32 %cond, 1 | ||
br i1 %t, label %yes, label %no | ||
|
||
no: | ||
%t2 = icmp eq i32 %cond, 2 | ||
br i1 %t2, label %no1, label %no2 | ||
no1: | ||
unreachable | ||
no2: | ||
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 1) | ||
unreachable | ||
yes: | ||
%t3 = icmp eq i32 %cond, 3 | ||
br i1 %t3, label %yes1, label %yes2 | ||
yes1: | ||
br label %exit | ||
yes2: | ||
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 2) | ||
%t4 = icmp eq i32 %cond, 4 | ||
br i1 %t4, label %yes3, label %yes4 | ||
yes3: | ||
br label %exit | ||
yes4: | ||
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 3) | ||
unreachable | ||
exit: | ||
ret void | ||
} | ||
|
||
!0 = !{i64 1234} | ||
|
||
;--- profile.json | ||
[{"Guid":1234, "Counters":[6,0,0,0]}] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,18 +47,21 @@ | |
; NOPROF-1-NOT: m2_f1() | ||
; NOPROF-2-NOT: m1_f1() | ||
; | ||
; The run with workload definitions - same other options. | ||
; The run with workload definitions - same other options. We do need to re-generate the .bc | ||
; files, to include instrumentation. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How did this work before this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These functions have no edges. Before, the validation had to do with edges, now we insist all counters are defined. |
||
; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m1.ll -o %t/m1-instr.bc | ||
; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m2.ll -o %t/m2-instr.bc | ||
; | ||
; RUN: echo '[ \ | ||
; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \ | ||
; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \ | ||
; RUN: ]' > %t_exp/ctxprof.json | ||
; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream | ||
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \ | ||
; RUN: llvm-lto2 run %t/m1-instr.bc %t/m2-instr.bc \ | ||
; RUN: -o %t_exp/result.o -save-temps \ | ||
; RUN: -use-ctx-profile=%t_exp/ctxprof.bitstream \ | ||
; RUN: -r %t/m1.bc,m1_f1,plx \ | ||
; RUN: -r %t/m2.bc,m2_f1,plx | ||
; RUN: -r %t/m1-instr.bc,m1_f1,plx \ | ||
; RUN: -r %t/m2-instr.bc,m2_f1,plx | ||
; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST | ||
; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND | ||
; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the situation where we have an entry in the Edges vector but it is null?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Edges that are removed, e.g. the faux coroutine suspend edge. I'll add a comment on the Edges member to explain the contents.