LLVM 20.0.0git
LongestCommonSequence.h
Go to the documentation of this file.
1//===- LongestCommonSequence.h - Compute LCS --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements longestCommonSequence, useful for finding matches
10// between two sequences, such as lists of profiling points.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H
15#define LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H
16
17#include "llvm/ADT/ArrayRef.h"
18
19#include <cstdint>
20#include <vector>
21
22namespace llvm {
23
24// This function implements the Myers diff algorithm used for stale profile
25// matching. The algorithm provides a simple and efficient way to find the
26// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
27// sequences. For more details, refer to the paper 'An O(ND) Difference
28// Algorithm and Its Variations' by Eugene W. Myers.
29// In the scenario of profile fuzzy matching, the two sequences are the IR
30// callsite anchors and profile callsite anchors. The subsequence equivalent
31// parts from the resulting SES are used to remap the IR locations to the
32// profile locations. As the number of function callsite is usually not big,
33// we currently just implements the basic greedy version(page 6 of the paper).
34template <typename Loc, typename Function,
35 typename AnchorList = ArrayRef<std::pair<Loc, Function>>>
37 AnchorList AnchorList1, AnchorList AnchorList2,
38 llvm::function_ref<bool(const Function &, const Function &)>
39 FunctionMatchesProfile,
40 llvm::function_ref<void(Loc, Loc)> InsertMatching) {
41 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
42 MaxDepth = Size1 + Size2;
43 auto Index = [&](int32_t I) { return I + MaxDepth; };
44
45 if (MaxDepth == 0)
46 return;
47
48 // Backtrack the SES result.
49 auto Backtrack = [&](ArrayRef<std::vector<int32_t>> Trace,
50 AnchorList AnchorList1, AnchorList AnchorList2) {
51 int32_t X = Size1, Y = Size2;
52 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
53 const auto &P = Trace[Depth];
54 int32_t K = X - Y;
55 int32_t PrevK = K;
56 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
57 PrevK = K + 1;
58 else
59 PrevK = K - 1;
60
61 int32_t PrevX = P[Index(PrevK)];
62 int32_t PrevY = PrevX - PrevK;
63 while (X > PrevX && Y > PrevY) {
64 X--;
65 Y--;
66 InsertMatching(AnchorList1[X].first, AnchorList2[Y].first);
67 }
68
69 if (Depth == 0)
70 break;
71
72 if (Y == PrevY)
73 X--;
74 else if (X == PrevX)
75 Y--;
76 X = PrevX;
77 Y = PrevY;
78 }
79 };
80
81 // The greedy LCS/SES algorithm.
82
83 // An array contains the endpoints of the furthest reaching D-paths.
84 std::vector<int32_t> V(2 * MaxDepth + 1, -1);
85 V[Index(1)] = 0;
86 // Trace is used to backtrack the SES result.
87 std::vector<std::vector<int32_t>> Trace;
88 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
89 Trace.push_back(V);
90 for (int32_t K = -Depth; K <= Depth; K += 2) {
91 int32_t X = 0, Y = 0;
92 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
93 X = V[Index(K + 1)];
94 else
95 X = V[Index(K - 1)] + 1;
96 Y = X - K;
97 while (
98 X < Size1 && Y < Size2 &&
99 FunctionMatchesProfile(AnchorList1[X].second, AnchorList2[Y].second))
100 X++, Y++;
101
102 V[Index(K)] = X;
103
104 if (X >= Size1 && Y >= Size2) {
105 // Length of an SES is D.
106 Backtrack(Trace, AnchorList1, AnchorList2);
107 return;
108 }
109 }
110 }
111 // Length of an SES is greater than MaxDepth.
112}
113
114} // end namespace llvm
115
116#endif // LLVM_TRANSFORMS_UTILS_LONGESTCOMMONSEQEUNCE_H
uint32_t Index
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
unsigned size() const
Definition: Trace.h:95
An efficient, type-erasing, non-owning reference to a callable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)