Skip to content

Commit f733582

Browse files
committed
[SampleFDO] Use Myers diff for stale profile matching
1 parent b932db0 commit f733582

File tree

4 files changed

+349
-65
lines changed

4 files changed

+349
-65
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,53 @@
1919

2020
namespace llvm {
2121

22+
// Callsite location based matching anchor.
23+
struct Anchor {
24+
LineLocation Loc;
25+
FunctionId FuncId;
26+
27+
Anchor(const LineLocation &Loc, const FunctionId &FuncId)
28+
: Loc(Loc), FuncId(FuncId) {}
29+
bool operator==(const Anchor &Other) const {
30+
return this->FuncId == Other.FuncId;
31+
}
32+
};
33+
34+
// This class implements the Myers diff algorithm used for stale profile
35+
// matching. The algorithm provides a simple and efficient way to find the
36+
// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
37+
// sequences. For more details, refer to the paper 'An O(ND) Difference
38+
// Algorithm and Its Variations' by Eugene W. Myers.
39+
// In the scenario of profile fuzzy matching, the two sequences are the IR
40+
// callsite anchors and profile callsite anchors. The subsequence equivalent
41+
// parts from the resulting SES are used to remap the IR locations to the
42+
// profile locations.
43+
class MyersDiff {
44+
public:
45+
struct DiffResult {
46+
LocToLocMap EqualLocations;
47+
// New IR locations that are inserted in the new version.
48+
std::vector<LineLocation> Insertions;
49+
// Old Profile locations that are deleted in the new version.
50+
std::vector<LineLocation> Deletions;
51+
void addEqualLocations(const LineLocation &IRLoc,
52+
const LineLocation &ProfLoc) {
53+
EqualLocations.insert({IRLoc, ProfLoc});
54+
}
55+
void addInsertion(const LineLocation &IRLoc) {
56+
Insertions.push_back(IRLoc);
57+
}
58+
void addDeletion(const LineLocation &ProfLoc) {
59+
Deletions.push_back(ProfLoc);
60+
}
61+
};
62+
63+
// The basic greedy version of Myers's algorithm. Refer to page 6 of the
64+
// original paper.
65+
DiffResult shortestEdit(const std::vector<Anchor> &A,
66+
const std::vector<Anchor> &B) const;
67+
};
68+
2269
// Sample profile matching - fuzzy match.
2370
class SampleProfileMatcher {
2471
Module &M;
@@ -27,8 +74,8 @@ class SampleProfileMatcher {
2774
const ThinOrFullLTOPhase LTOPhase;
2875
SampleProfileMap FlattenedProfiles;
2976
// For each function, the matcher generates a map, of which each entry is a
30-
// mapping from the source location of current build to the source location in
31-
// the profile.
77+
// mapping from the source location of current build to the source location
78+
// in the profile.
3279
StringMap<LocToLocMap> FuncMappings;
3380

3481
// Match state for an anchor/callsite.
@@ -143,6 +190,10 @@ class SampleProfileMatcher {
143190
}
144191
void distributeIRToProfileLocationMap();
145192
void distributeIRToProfileLocationMap(FunctionSamples &FS);
193+
void matchNonAnchorAndWriteResults(
194+
const LocToLocMap &AnchorMatchings,
195+
const std::map<LineLocation, StringRef> &IRAnchors,
196+
LocToLocMap &IRToProfileLocationMap);
146197
void runStaleProfileMatching(
147198
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
148199
const std::map<LineLocation, std::unordered_set<FunctionId>>

llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp

Lines changed: 160 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,149 @@ void SampleProfileMatcher::findProfileAnchors(
122122
}
123123
}
124124

125+
MyersDiff::DiffResult
126+
MyersDiff::shortestEdit(const std::vector<Anchor> &A,
127+
const std::vector<Anchor> &B) const {
128+
int32_t N = A.size(), M = B.size(), Max = N + M;
129+
auto Index = [&](int32_t I) { return I + Max; };
130+
131+
DiffResult Diff;
132+
if (Max == 0)
133+
return Diff;
134+
135+
// Backtrack the SES result.
136+
auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
137+
const std::vector<Anchor> &A,
138+
const std::vector<Anchor> &B) {
139+
int32_t X = N, Y = M;
140+
for (int32_t D = Trace.size() - 1; X > 0 || Y > 0; D--) {
141+
const auto &P = Trace[D];
142+
int32_t K = X - Y;
143+
int32_t PrevK = K;
144+
if (K == -D || (K != D && P[Index(K - 1)] < P[Index(K + 1)]))
145+
PrevK = K + 1;
146+
else
147+
PrevK = K - 1;
148+
149+
int32_t PrevX = P[Index(PrevK)];
150+
int32_t PrevY = PrevX - PrevK;
151+
while (X > PrevX && Y > PrevY) {
152+
X--;
153+
Y--;
154+
Diff.addEqualLocations(A[X].Loc, B[Y].Loc);
155+
}
156+
157+
if (D == 0)
158+
break;
159+
160+
if (Y == PrevY) {
161+
X--;
162+
Diff.addInsertion(A[X].Loc);
163+
} else if (X == PrevX) {
164+
Y--;
165+
Diff.addDeletion(B[Y].Loc);
166+
}
167+
X = PrevX;
168+
Y = PrevY;
169+
}
170+
};
171+
172+
// The greedy LCS/SES algorithm.
173+
std::vector<int32_t> V(2 * Max + 1, -1);
174+
V[Index(1)] = 0;
175+
std::vector<std::vector<int32_t>> Trace;
176+
for (int32_t D = 0; D <= Max; D++) {
177+
Trace.push_back(V);
178+
for (int32_t K = -D; K <= D; K += 2) {
179+
int32_t X = 0, Y = 0;
180+
if (K == -D || (K != D && V[Index(K - 1)] < V[Index(K + 1)]))
181+
X = V[Index(K + 1)];
182+
else
183+
X = V[Index(K - 1)] + 1;
184+
Y = X - K;
185+
while (X < N && Y < M && A[X] == B[Y])
186+
X++, Y++;
187+
188+
V[Index(K)] = X;
189+
190+
if (X >= N && Y >= M) {
191+
// Length of an SES is D.
192+
Backtrack(Trace, A, B);
193+
return Diff;
194+
}
195+
}
196+
}
197+
// Length of an SES is greater than Max.
198+
return Diff;
199+
}
200+
201+
void SampleProfileMatcher::matchNonAnchorAndWriteResults(
202+
const LocToLocMap &AnchorMatchings,
203+
const std::map<LineLocation, StringRef> &IRAnchors,
204+
LocToLocMap &IRToProfileLocationMap) {
205+
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
206+
// Skip the unchanged location mapping to save memory.
207+
if (From != To)
208+
IRToProfileLocationMap.insert({From, To});
209+
};
210+
211+
// Use function's beginning location as the initial anchor.
212+
int32_t LocationDelta = 0;
213+
SmallVector<LineLocation> LastMatchedNonAnchors;
214+
for (const auto &IR : IRAnchors) {
215+
const auto &Loc = IR.first;
216+
StringRef CalleeName = IR.second;
217+
bool IsMatchedAnchor = false;
218+
219+
// Match the anchor location in lexical order.
220+
auto R = AnchorMatchings.find(Loc);
221+
if (R != AnchorMatchings.end()) {
222+
const auto &Candidate = R->second;
223+
InsertMatching(Loc, Candidate);
224+
LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
225+
<< " is matched from " << Loc << " to " << Candidate
226+
<< "\n");
227+
LocationDelta = Candidate.LineOffset - Loc.LineOffset;
228+
229+
// Match backwards for non-anchor locations.
230+
// The locations in LastMatchedNonAnchors have been matched forwards
231+
// based on the previous anchor, spilt it evenly and overwrite the
232+
// second half based on the current anchor.
233+
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
234+
I < LastMatchedNonAnchors.size(); I++) {
235+
const auto &L = LastMatchedNonAnchors[I];
236+
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
237+
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
238+
InsertMatching(L, Candidate);
239+
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
240+
<< " to " << Candidate << "\n");
241+
}
242+
243+
IsMatchedAnchor = true;
244+
LastMatchedNonAnchors.clear();
245+
}
246+
247+
// Match forwards for non-anchor locations.
248+
if (!IsMatchedAnchor) {
249+
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
250+
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
251+
InsertMatching(Loc, Candidate);
252+
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
253+
<< Candidate << "\n");
254+
LastMatchedNonAnchors.emplace_back(Loc);
255+
}
256+
}
257+
}
258+
125259
// Call target name anchor based profile fuzzy matching.
126260
// Input:
127261
// For IR locations, the anchor is the callee name of direct callsite; For
128262
// profile locations, it's the call target name for BodySamples or inlinee's
129263
// profile name for CallsiteSamples.
130264
// Matching heuristic:
131-
// First match all the anchors in lexical order, then split the non-anchor
132-
// locations between the two anchors evenly, first half are matched based on the
133-
// start anchor, second half are matched based on the end anchor.
265+
// First match all the anchors using the diff algorithm, then split the
266+
// non-anchor locations between the two anchors evenly, first half are matched
267+
// based on the start anchor, second half are matched based on the end anchor.
134268
// For example, given:
135269
// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
136270
// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
@@ -149,77 +283,40 @@ void SampleProfileMatcher::runStaleProfileMatching(
149283
assert(IRToProfileLocationMap.empty() &&
150284
"Run stale profile matching only once per function");
151285

152-
std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
286+
std::vector<Anchor> ProfileCallsiteAnchors;
153287
for (const auto &I : ProfileAnchors) {
154288
const auto &Loc = I.first;
155289
const auto &Callees = I.second;
156290
// Filter out possible indirect calls, use direct callee name as anchor.
157291
if (Callees.size() == 1) {
158-
FunctionId CalleeName = *Callees.begin();
159-
const auto &Candidates = CalleeToCallsitesMap.try_emplace(
160-
CalleeName, std::set<LineLocation>());
161-
Candidates.first->second.insert(Loc);
292+
auto CalleeName = *Callees.begin();
293+
ProfileCallsiteAnchors.emplace_back(Loc, CalleeName);
294+
} else if (Callees.size() > 1) {
295+
ProfileCallsiteAnchors.emplace_back(Loc,
296+
FunctionId(UnknownIndirectCallee));
162297
}
163298
}
164299

165-
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
166-
// Skip the unchanged location mapping to save memory.
167-
if (From != To)
168-
IRToProfileLocationMap.insert({From, To});
169-
};
170-
171-
// Use function's beginning location as the initial anchor.
172-
int32_t LocationDelta = 0;
173-
SmallVector<LineLocation> LastMatchedNonAnchors;
300+
std::vector<Anchor> IRCallsiteAnchors;
301+
for (const auto &I : IRAnchors) {
302+
const auto &Loc = I.first;
303+
const auto &CalleeName = I.second;
304+
if (CalleeName.empty())
305+
continue;
306+
IRCallsiteAnchors.emplace_back(Loc, FunctionId(CalleeName));
307+
}
174308

175-
for (const auto &IR : IRAnchors) {
176-
const auto &Loc = IR.first;
177-
auto CalleeName = IR.second;
178-
bool IsMatchedAnchor = false;
179-
// Match the anchor location in lexical order.
180-
if (!CalleeName.empty()) {
181-
auto CandidateAnchors =
182-
CalleeToCallsitesMap.find(getRepInFormat(CalleeName));
183-
if (CandidateAnchors != CalleeToCallsitesMap.end() &&
184-
!CandidateAnchors->second.empty()) {
185-
auto CI = CandidateAnchors->second.begin();
186-
const auto Candidate = *CI;
187-
CandidateAnchors->second.erase(CI);
188-
InsertMatching(Loc, Candidate);
189-
LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
190-
<< " is matched from " << Loc << " to " << Candidate
191-
<< "\n");
192-
LocationDelta = Candidate.LineOffset - Loc.LineOffset;
193-
194-
// Match backwards for non-anchor locations.
195-
// The locations in LastMatchedNonAnchors have been matched forwards
196-
// based on the previous anchor, spilt it evenly and overwrite the
197-
// second half based on the current anchor.
198-
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
199-
I < LastMatchedNonAnchors.size(); I++) {
200-
const auto &L = LastMatchedNonAnchors[I];
201-
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
202-
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
203-
InsertMatching(L, Candidate);
204-
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
205-
<< " to " << Candidate << "\n");
206-
}
309+
if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
310+
return;
207311

208-
IsMatchedAnchor = true;
209-
LastMatchedNonAnchors.clear();
210-
}
211-
}
312+
// Use the diff algorithm to find the SES, the resulting equal locations from
313+
// IR to Profile are used as anchor to match other locations. Note that here
314+
// use IR anchor as base(A) to align with the order of IRToProfileLocationMap.
315+
MyersDiff Diff;
316+
auto DiffRes = Diff.shortestEdit(IRCallsiteAnchors, ProfileCallsiteAnchors);
212317

213-
// Match forwards for non-anchor locations.
214-
if (!IsMatchedAnchor) {
215-
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
216-
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
217-
InsertMatching(Loc, Candidate);
218-
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
219-
<< Candidate << "\n");
220-
LastMatchedNonAnchors.emplace_back(Loc);
221-
}
222-
}
318+
matchNonAnchorAndWriteResults(DiffRes.EqualLocations, IRAnchors,
319+
IRToProfileLocationMap);
223320
}
224321

225322
void SampleProfileMatcher::runOnFunction(Function &F) {

llvm/unittests/Transforms/IPO/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
33
AsmParser
44
Core
55
IPO
6+
ProfileData
67
Support
78
TargetParser
89
TransformUtils
@@ -13,6 +14,7 @@ add_llvm_unittest(IPOTests
1314
WholeProgramDevirt.cpp
1415
AttributorTest.cpp
1516
FunctionSpecializationTest.cpp
17+
SampleProfileMatcherTests.cpp
1618
)
1719

1820
set_property(TARGET IPOTests PROPERTY FOLDER "Tests/UnitTests/TransformsTests")

0 commit comments

Comments
 (0)