Skip to content

Commit 3a5791e

Browse files
[SampleProf] Templatize longestCommonSequence (NFC) (#114633)
This patch moves the implementation of longestCommonSequence to a new header file. I'm planning to implement a profile undrifting algorithm for MemProf so that the compiler can ingest somewhat stale MemProf profile and still deliver most of the benefits that would be delivered if the profile were completely up to date (with no line number or column number differences). Since the core undrifting algorithm is the same between MemProf and AutoFDO, this patch turns longestCommonSequence into a template. The original longestCommonSequence implementation is repurposed and now serves as a wrapper around a template specialization. Note that the usage differences between MemProf and AutoFDO are minor. For example, I'm planning to use line-column number pair instead of LineLocation, which uses a discriminator. To identify a function, I'm planning to use uint64_t GUID instead of FunctionId. For now, I'm returning matches via a function object InsertMatching because it's impossible to infer the map type from LineLocation alone. Specifically: std::unordered_map<LineLocation, LineLocation> does not work because we cannot infer the hash functor LineLocationHash. I could define std::hash<LineLocation>. Alternatively, in the future, I might switch to DenseMap and define DenseMapInfo<LineLocation>. This way: DenseMap<LineLocation, LineLocation> automatically picks up DenseMapInfo<LineLocation>.
1 parent 176d653 commit 3a5791e

File tree

3 files changed

+130
-86
lines changed

3 files changed

+130
-86
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,16 +205,6 @@ class SampleProfileMatcher {
205205
}
206206
void distributeIRToProfileLocationMap();
207207
void distributeIRToProfileLocationMap(FunctionSamples &FS);
208-
// This function implements the Myers diff algorithm used for stale profile
209-
// matching. The algorithm provides a simple and efficient way to find the
210-
// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
211-
// sequences. For more details, refer to the paper 'An O(ND) Difference
212-
// Algorithm and Its Variations' by Eugene W. Myers.
213-
// In the scenario of profile fuzzy matching, the two sequences are the IR
214-
// callsite anchors and profile callsite anchors. The subsequence equivalent
215-
// parts from the resulting SES are used to remap the IR locations to the
216-
// profile locations. As the number of function callsite is usually not big,
217-
// we currently just implements the basic greedy version(page 6 of the paper).
218208
LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors,
219209
const AnchorList &ProfileCallsiteAnchors,
220210
bool MatchUnusedFunction);
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===- LongestCommonSequence.h - Compute LCS --------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements longestCommonSequence, useful for finding matches
10+
// between two sequences, such as lists of profiling points.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H
15+
#define LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H
16+
17+
#include "llvm/ADT/ArrayRef.h"
18+
19+
#include <cstdint>
20+
#include <vector>
21+
22+
namespace llvm {
23+
24+
// This function implements the Myers diff algorithm used for stale profile
25+
// matching. The algorithm provides a simple and efficient way to find the
26+
// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
27+
// sequences. For more details, refer to the paper 'An O(ND) Difference
28+
// Algorithm and Its Variations' by Eugene W. Myers.
29+
// In the scenario of profile fuzzy matching, the two sequences are the IR
30+
// callsite anchors and profile callsite anchors. The subsequence equivalent
31+
// parts from the resulting SES are used to remap the IR locations to the
32+
// profile locations. As the number of function callsite is usually not big,
33+
// we currently just implements the basic greedy version(page 6 of the paper).
34+
template <typename Loc, typename Function,
35+
typename AnchorList = ArrayRef<std::pair<Loc, Function>>>
36+
void longestCommonSequence(
37+
AnchorList AnchorList1, AnchorList AnchorList2,
38+
llvm::function_ref<bool(const Function &, const Function &)>
39+
FunctionMatchesProfile,
40+
llvm::function_ref<void(Loc, Loc)> InsertMatching) {
41+
int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
42+
MaxDepth = Size1 + Size2;
43+
auto Index = [&](int32_t I) { return I + MaxDepth; };
44+
45+
if (MaxDepth == 0)
46+
return;
47+
48+
// Backtrack the SES result.
49+
auto Backtrack = [&](ArrayRef<std::vector<int32_t>> Trace,
50+
AnchorList AnchorList1, AnchorList AnchorList2) {
51+
int32_t X = Size1, Y = Size2;
52+
for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
53+
const auto &P = Trace[Depth];
54+
int32_t K = X - Y;
55+
int32_t PrevK = K;
56+
if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
57+
PrevK = K + 1;
58+
else
59+
PrevK = K - 1;
60+
61+
int32_t PrevX = P[Index(PrevK)];
62+
int32_t PrevY = PrevX - PrevK;
63+
while (X > PrevX && Y > PrevY) {
64+
X--;
65+
Y--;
66+
InsertMatching(AnchorList1[X].first, AnchorList2[Y].first);
67+
}
68+
69+
if (Depth == 0)
70+
break;
71+
72+
if (Y == PrevY)
73+
X--;
74+
else if (X == PrevX)
75+
Y--;
76+
X = PrevX;
77+
Y = PrevY;
78+
}
79+
};
80+
81+
// The greedy LCS/SES algorithm.
82+
83+
// An array contains the endpoints of the furthest reaching D-paths.
84+
std::vector<int32_t> V(2 * MaxDepth + 1, -1);
85+
V[Index(1)] = 0;
86+
// Trace is used to backtrack the SES result.
87+
std::vector<std::vector<int32_t>> Trace;
88+
for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
89+
Trace.push_back(V);
90+
for (int32_t K = -Depth; K <= Depth; K += 2) {
91+
int32_t X = 0, Y = 0;
92+
if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
93+
X = V[Index(K + 1)];
94+
else
95+
X = V[Index(K - 1)] + 1;
96+
Y = X - K;
97+
while (
98+
X < Size1 && Y < Size2 &&
99+
FunctionMatchesProfile(AnchorList1[X].second, AnchorList2[Y].second))
100+
X++, Y++;
101+
102+
V[Index(K)] = X;
103+
104+
if (X >= Size1 && Y >= Size2) {
105+
// Length of an SES is D.
106+
Backtrack(Trace, AnchorList1, AnchorList2);
107+
return;
108+
}
109+
}
110+
}
111+
// Length of an SES is greater than MaxDepth.
112+
}
113+
114+
} // end namespace llvm
115+
116+
#endif // LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H

llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp

Lines changed: 14 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/IR/IntrinsicInst.h"
1616
#include "llvm/IR/MDBuilder.h"
1717
#include "llvm/Support/CommandLine.h"
18+
#include "llvm/Transforms/Utils/LongestCommonSequence.h"
1819

1920
using namespace llvm;
2021
using namespace sampleprof;
@@ -194,82 +195,19 @@ LocToLocMap
194195
SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
195196
const AnchorList &AnchorList2,
196197
bool MatchUnusedFunction) {
197-
int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
198-
MaxDepth = Size1 + Size2;
199-
auto Index = [&](int32_t I) { return I + MaxDepth; };
200-
201-
LocToLocMap EqualLocations;
202-
if (MaxDepth == 0)
203-
return EqualLocations;
204-
205-
// Backtrack the SES result.
206-
auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
207-
const AnchorList &AnchorList1,
208-
const AnchorList &AnchorList2,
209-
LocToLocMap &EqualLocations) {
210-
int32_t X = Size1, Y = Size2;
211-
for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
212-
const auto &P = Trace[Depth];
213-
int32_t K = X - Y;
214-
int32_t PrevK = K;
215-
if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
216-
PrevK = K + 1;
217-
else
218-
PrevK = K - 1;
219-
220-
int32_t PrevX = P[Index(PrevK)];
221-
int32_t PrevY = PrevX - PrevK;
222-
while (X > PrevX && Y > PrevY) {
223-
X--;
224-
Y--;
225-
EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
226-
}
227-
228-
if (Depth == 0)
229-
break;
230-
231-
if (Y == PrevY)
232-
X--;
233-
else if (X == PrevX)
234-
Y--;
235-
X = PrevX;
236-
Y = PrevY;
237-
}
238-
};
239-
240-
// The greedy LCS/SES algorithm.
241-
242-
// An array contains the endpoints of the furthest reaching D-paths.
243-
std::vector<int32_t> V(2 * MaxDepth + 1, -1);
244-
V[Index(1)] = 0;
245-
// Trace is used to backtrack the SES result.
246-
std::vector<std::vector<int32_t>> Trace;
247-
for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
248-
Trace.push_back(V);
249-
for (int32_t K = -Depth; K <= Depth; K += 2) {
250-
int32_t X = 0, Y = 0;
251-
if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
252-
X = V[Index(K + 1)];
253-
else
254-
X = V[Index(K - 1)] + 1;
255-
Y = X - K;
256-
while (X < Size1 && Y < Size2 &&
257-
functionMatchesProfile(
258-
AnchorList1[X].second, AnchorList2[Y].second,
259-
!MatchUnusedFunction /* Find matched function only */))
260-
X++, Y++;
261-
262-
V[Index(K)] = X;
263-
264-
if (X >= Size1 && Y >= Size2) {
265-
// Length of an SES is D.
266-
Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
267-
return EqualLocations;
268-
}
269-
}
270-
}
271-
// Length of an SES is greater than MaxDepth.
272-
return EqualLocations;
198+
LocToLocMap MatchedAnchors;
199+
llvm::longestCommonSequence<LineLocation, FunctionId>(
200+
AnchorList1, AnchorList2,
201+
[&](const FunctionId &A, const FunctionId &B) {
202+
return functionMatchesProfile(
203+
A, B,
204+
!MatchUnusedFunction // Find matched function only
205+
);
206+
},
207+
[&](LineLocation A, LineLocation B) {
208+
MatchedAnchors.try_emplace(A, B);
209+
});
210+
return MatchedAnchors;
273211
}
274212

275213
void SampleProfileMatcher::matchNonCallsiteLocs(

0 commit comments

Comments
 (0)