-
Notifications
You must be signed in to change notification settings - Fork 14.3k
BalancedPartitioning: minor updates #77568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-pgo Author: Fangrui Song (MaskRay) ChangesWhen LargestTraceSize is a power of two, createBPFunctionNodes does not BalancedPartitioning::bisect can use unstable sort because BalancedPartitioning::runIterations: use one DenseMap and simplify the Full diff: https://github.com/llvm/llvm-project/pull/77568.diff 3 Files Affected:
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 4264da8ad75143..6016fcb3551d11 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -13,6 +13,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -921,7 +922,7 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
if (Timestamp < Trace.FunctionNameRefs.size())
FunctionIds.insert(Trace.FunctionNameRefs[Timestamp]);
- int N = std::ceil(std::log2(LargestTraceSize));
+ const int N = Log2_64(LargestTraceSize) + 1;
// TODO: We need to use the Trace.Weight field to give more weight to more
// important utilities
@@ -929,8 +930,8 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
for (size_t TraceIdx = 0; TraceIdx < Traces.size(); TraceIdx++) {
auto &Trace = Traces[TraceIdx].FunctionNameRefs;
for (size_t Timestamp = 0; Timestamp < Trace.size(); Timestamp++) {
- for (int I = std::floor(std::log2(Timestamp + 1)); I < N; I++) {
- auto &FunctionId = Trace[Timestamp];
+ for (int I = Log2_64(Timestamp + 1); I < N; I++) {
+ auto FunctionId = Trace[Timestamp];
UtilityNodeT GroupId = TraceIdx * N + I;
FuncGroups[FunctionId].push_back(GroupId);
}
@@ -938,7 +939,7 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
}
std::vector<BPFunctionNode> Nodes;
- for (auto &Id : FunctionIds) {
+ for (auto Id : FunctionIds) {
auto &UNs = FuncGroups[Id];
llvm::sort(UNs);
UNs.erase(std::unique(UNs.begin(), UNs.end()), UNs.end());
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
index 5843be94991151..cb6ba61179941f 100644
--- a/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -117,7 +117,7 @@ void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
// We've reach the lowest level of the recursion tree. Fall back to the
// original order and assign to buckets.
- llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
+ llvm::sort(Nodes, [](const auto &L, const auto &R) {
return L.InputOrderIndex < R.InputOrderIndex;
});
for (auto &N : Nodes)
@@ -167,26 +167,22 @@ void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
unsigned RightBucket,
std::mt19937 &RNG) const {
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
- DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
for (auto &N : Nodes)
for (auto &UN : N.UtilityNodes)
- ++UtilityNodeDegree[UN];
+ ++UtilityNodeIndex[UN];
// Remove utility nodes if they have just one edge or are connected to all
// functions
for (auto &N : Nodes)
llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
- return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
+ return UtilityNodeIndex[UN] == 1 || UtilityNodeIndex[UN] == NumNodes;
});
// Renumber utility nodes so they can be used to index into Signatures
- DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
- for (auto &N : Nodes)
- for (auto &UN : N.UtilityNodes)
- if (!UtilityNodeIndex.count(UN))
- UtilityNodeIndex[UN] = UtilityNodeIndex.size();
+ UtilityNodeIndex.clear();
for (auto &N : Nodes)
for (auto &UN : N.UtilityNodes)
- UN = UtilityNodeIndex[UN];
+ UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second;
// Initialize signatures
SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
diff --git a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
index 97ff5c1edf5efc..6af6f1bcdc40a8 100644
--- a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
+++ b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
@@ -24,11 +24,6 @@ void PrintTo(const BPFunctionNode &Node, std::ostream *OS) {
}
TEST(BPFunctionNodeTest, Basic) {
- auto Nodes = TemporalProfTraceTy::createBPFunctionNodes({
- TemporalProfTraceTy({0, 1, 2, 3, 4}),
- TemporalProfTraceTy({4, 2}),
- });
-
auto NodeIs = [](BPFunctionNode::IDT Id,
ArrayRef<BPFunctionNode::UtilityNodeT> UNs) {
return AllOf(Field("Id", &BPFunctionNode::Id, Id),
@@ -36,6 +31,18 @@ TEST(BPFunctionNodeTest, Basic) {
UnorderedElementsAreArray(UNs)));
};
+ auto Nodes = TemporalProfTraceTy::createBPFunctionNodes({
+ TemporalProfTraceTy({0, 1, 2, 3}),
+ });
+ EXPECT_THAT(Nodes,
+ UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}),
+ NodeIs(2, {1, 2}), NodeIs(3, {2})));
+
+ Nodes = TemporalProfTraceTy::createBPFunctionNodes({
+ TemporalProfTraceTy({0, 1, 2, 3, 4}),
+ TemporalProfTraceTy({4, 2}),
+ });
+
EXPECT_THAT(Nodes,
UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}),
NodeIs(2, {1, 2, 4, 5}), NodeIs(3, {2}),
|
@llvm/pr-subscribers-llvm-support Author: Fangrui Song (MaskRay) ChangesWhen LargestTraceSize is a power of two, createBPFunctionNodes does not BalancedPartitioning::bisect can use unstable sort because BalancedPartitioning::runIterations: use one DenseMap and simplify the Full diff: https://github.com/llvm/llvm-project/pull/77568.diff 3 Files Affected:
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 4264da8ad75143..6016fcb3551d11 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -13,6 +13,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -921,7 +922,7 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
if (Timestamp < Trace.FunctionNameRefs.size())
FunctionIds.insert(Trace.FunctionNameRefs[Timestamp]);
- int N = std::ceil(std::log2(LargestTraceSize));
+ const int N = Log2_64(LargestTraceSize) + 1;
// TODO: We need to use the Trace.Weight field to give more weight to more
// important utilities
@@ -929,8 +930,8 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
for (size_t TraceIdx = 0; TraceIdx < Traces.size(); TraceIdx++) {
auto &Trace = Traces[TraceIdx].FunctionNameRefs;
for (size_t Timestamp = 0; Timestamp < Trace.size(); Timestamp++) {
- for (int I = std::floor(std::log2(Timestamp + 1)); I < N; I++) {
- auto &FunctionId = Trace[Timestamp];
+ for (int I = Log2_64(Timestamp + 1); I < N; I++) {
+ auto FunctionId = Trace[Timestamp];
UtilityNodeT GroupId = TraceIdx * N + I;
FuncGroups[FunctionId].push_back(GroupId);
}
@@ -938,7 +939,7 @@ std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
}
std::vector<BPFunctionNode> Nodes;
- for (auto &Id : FunctionIds) {
+ for (auto Id : FunctionIds) {
auto &UNs = FuncGroups[Id];
llvm::sort(UNs);
UNs.erase(std::unique(UNs.begin(), UNs.end()), UNs.end());
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
index 5843be94991151..cb6ba61179941f 100644
--- a/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -117,7 +117,7 @@ void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
// We've reach the lowest level of the recursion tree. Fall back to the
// original order and assign to buckets.
- llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
+ llvm::sort(Nodes, [](const auto &L, const auto &R) {
return L.InputOrderIndex < R.InputOrderIndex;
});
for (auto &N : Nodes)
@@ -167,26 +167,22 @@ void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
unsigned RightBucket,
std::mt19937 &RNG) const {
unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
- DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
for (auto &N : Nodes)
for (auto &UN : N.UtilityNodes)
- ++UtilityNodeDegree[UN];
+ ++UtilityNodeIndex[UN];
// Remove utility nodes if they have just one edge or are connected to all
// functions
for (auto &N : Nodes)
llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
- return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
+ return UtilityNodeIndex[UN] == 1 || UtilityNodeIndex[UN] == NumNodes;
});
// Renumber utility nodes so they can be used to index into Signatures
- DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
- for (auto &N : Nodes)
- for (auto &UN : N.UtilityNodes)
- if (!UtilityNodeIndex.count(UN))
- UtilityNodeIndex[UN] = UtilityNodeIndex.size();
+ UtilityNodeIndex.clear();
for (auto &N : Nodes)
for (auto &UN : N.UtilityNodes)
- UN = UtilityNodeIndex[UN];
+ UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second;
// Initialize signatures
SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
diff --git a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
index 97ff5c1edf5efc..6af6f1bcdc40a8 100644
--- a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
+++ b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp
@@ -24,11 +24,6 @@ void PrintTo(const BPFunctionNode &Node, std::ostream *OS) {
}
TEST(BPFunctionNodeTest, Basic) {
- auto Nodes = TemporalProfTraceTy::createBPFunctionNodes({
- TemporalProfTraceTy({0, 1, 2, 3, 4}),
- TemporalProfTraceTy({4, 2}),
- });
-
auto NodeIs = [](BPFunctionNode::IDT Id,
ArrayRef<BPFunctionNode::UtilityNodeT> UNs) {
return AllOf(Field("Id", &BPFunctionNode::Id, Id),
@@ -36,6 +31,18 @@ TEST(BPFunctionNodeTest, Basic) {
UnorderedElementsAreArray(UNs)));
};
+ auto Nodes = TemporalProfTraceTy::createBPFunctionNodes({
+ TemporalProfTraceTy({0, 1, 2, 3}),
+ });
+ EXPECT_THAT(Nodes,
+ UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}),
+ NodeIs(2, {1, 2}), NodeIs(3, {2})));
+
+ Nodes = TemporalProfTraceTy::createBPFunctionNodes({
+ TemporalProfTraceTy({0, 1, 2, 3, 4}),
+ TemporalProfTraceTy({4, 2}),
+ });
+
EXPECT_THAT(Nodes,
UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}),
NodeIs(2, {1, 2, 4, 5}), NodeIs(3, {2}),
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Thanks for adding another test
When LargestTraceSize is a power of two, createBPFunctionNodes does not allocate a group ID for Trace[LargestTraceSize-1] (as N is off by 1). Fix this and change floor+log2 to Log2_64. BalancedPartitioning::bisect can use unstable sort because `Nodes` contains distinct `InputOrderIndex`s. BalancedPartitioning::runIterations: use one DenseMap and simplify the node renumbering code.
56a21a6
to
517b26f
Compare
When LargestTraceSize is a power of two, createBPFunctionNodes does not allocate a group ID for Trace[LargestTraceSize-1] (as N is off by 1). Fix this and change floor+log2 to Log2_64. BalancedPartitioning::bisect can use unstable sort because `Nodes` contains distinct `InputOrderIndex`s. BalancedPartitioning::runIterations: use one DenseMap and simplify the node renumbering code.
When LargestTraceSize is a power of two, createBPFunctionNodes does not
allocate a group ID for Trace[LargestTraceSize-1] (as N is off by 1). Fix
this and change floor+log2 to Log2_64.
BalancedPartitioning::bisect can use unstable sort because
Nodes
contains distinct
InputOrderIndex
s.BalancedPartitioning::runIterations: use one DenseMap and simplify the
node renumbering code.