|
| 1 | +//===- LongestCommonSequence.h - Compute LCS --------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This file implements longestCommonSequence, useful for finding matches |
| 10 | +// between two sequences, such as lists of profiling points. |
| 11 | +// |
| 12 | +//===----------------------------------------------------------------------===// |
| 13 | + |
| 14 | +#ifndef LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H |
| 15 | +#define LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H |
| 16 | + |
| 17 | +#include "llvm/ADT/ArrayRef.h" |
| 18 | + |
| 19 | +#include <cstdint> |
| 20 | +#include <vector> |
| 21 | + |
| 22 | +namespace llvm { |
| 23 | + |
| 24 | +// This function implements the Myers diff algorithm used for stale profile |
| 25 | +// matching. The algorithm provides a simple and efficient way to find the |
| 26 | +// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two |
| 27 | +// sequences. For more details, refer to the paper 'An O(ND) Difference |
| 28 | +// Algorithm and Its Variations' by Eugene W. Myers. |
| 29 | +// In the scenario of profile fuzzy matching, the two sequences are the IR |
| 30 | +// callsite anchors and profile callsite anchors. The subsequence equivalent |
| 31 | +// parts from the resulting SES are used to remap the IR locations to the |
| 32 | +// profile locations. As the number of function callsite is usually not big, |
| 33 | +// we currently just implements the basic greedy version(page 6 of the paper). |
| 34 | +template <typename Loc, typename Function, |
| 35 | + typename AnchorList = ArrayRef<std::pair<Loc, Function>>> |
| 36 | +void longestCommonSequence( |
| 37 | + AnchorList AnchorList1, AnchorList AnchorList2, |
| 38 | + llvm::function_ref<bool(const Function &, const Function &)> |
| 39 | + FunctionMatchesProfile, |
| 40 | + llvm::function_ref<void(Loc, Loc)> InsertMatching) { |
| 41 | + int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(), |
| 42 | + MaxDepth = Size1 + Size2; |
| 43 | + auto Index = [&](int32_t I) { return I + MaxDepth; }; |
| 44 | + |
| 45 | + if (MaxDepth == 0) |
| 46 | + return; |
| 47 | + |
| 48 | + // Backtrack the SES result. |
| 49 | + auto Backtrack = [&](ArrayRef<std::vector<int32_t>> Trace, |
| 50 | + AnchorList AnchorList1, AnchorList AnchorList2) { |
| 51 | + int32_t X = Size1, Y = Size2; |
| 52 | + for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) { |
| 53 | + const auto &P = Trace[Depth]; |
| 54 | + int32_t K = X - Y; |
| 55 | + int32_t PrevK = K; |
| 56 | + if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)])) |
| 57 | + PrevK = K + 1; |
| 58 | + else |
| 59 | + PrevK = K - 1; |
| 60 | + |
| 61 | + int32_t PrevX = P[Index(PrevK)]; |
| 62 | + int32_t PrevY = PrevX - PrevK; |
| 63 | + while (X > PrevX && Y > PrevY) { |
| 64 | + X--; |
| 65 | + Y--; |
| 66 | + InsertMatching(AnchorList1[X].first, AnchorList2[Y].first); |
| 67 | + } |
| 68 | + |
| 69 | + if (Depth == 0) |
| 70 | + break; |
| 71 | + |
| 72 | + if (Y == PrevY) |
| 73 | + X--; |
| 74 | + else if (X == PrevX) |
| 75 | + Y--; |
| 76 | + X = PrevX; |
| 77 | + Y = PrevY; |
| 78 | + } |
| 79 | + }; |
| 80 | + |
| 81 | + // The greedy LCS/SES algorithm. |
| 82 | + |
| 83 | + // An array contains the endpoints of the furthest reaching D-paths. |
| 84 | + std::vector<int32_t> V(2 * MaxDepth + 1, -1); |
| 85 | + V[Index(1)] = 0; |
| 86 | + // Trace is used to backtrack the SES result. |
| 87 | + std::vector<std::vector<int32_t>> Trace; |
| 88 | + for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) { |
| 89 | + Trace.push_back(V); |
| 90 | + for (int32_t K = -Depth; K <= Depth; K += 2) { |
| 91 | + int32_t X = 0, Y = 0; |
| 92 | + if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)])) |
| 93 | + X = V[Index(K + 1)]; |
| 94 | + else |
| 95 | + X = V[Index(K - 1)] + 1; |
| 96 | + Y = X - K; |
| 97 | + while ( |
| 98 | + X < Size1 && Y < Size2 && |
| 99 | + FunctionMatchesProfile(AnchorList1[X].second, AnchorList2[Y].second)) |
| 100 | + X++, Y++; |
| 101 | + |
| 102 | + V[Index(K)] = X; |
| 103 | + |
| 104 | + if (X >= Size1 && Y >= Size2) { |
| 105 | + // Length of an SES is D. |
| 106 | + Backtrack(Trace, AnchorList1, AnchorList2); |
| 107 | + return; |
| 108 | + } |
| 109 | + } |
| 110 | + } |
| 111 | + // Length of an SES is greater than MaxDepth. |
| 112 | +} |
| 113 | + |
| 114 | +} // end namespace llvm |
| 115 | + |
| 116 | +#endif // LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H |
0 commit comments