LLVM 20.0.0git
ProfiledCallGraph.h
Go to the documentation of this file.
1//===-- ProfiledCallGraph.h - Profiled Call Graph ----------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
10#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
11
16#include <queue>
17#include <set>
18
19namespace llvm {
20namespace sampleprof {
21
22struct ProfiledCallGraphNode;
23
31
32 // The call destination is the only important data here,
33 // allow to transparently unwrap into it.
34 operator ProfiledCallGraphNode *() const { return Target; }
35};
36
38
39 // Sort edges by callee names only since all edges to be compared are from
40 // same caller. Edge weights are not considered either because for the same
41 // callee only the edge with the largest weight is added to the edge set.
44 const ProfiledCallGraphEdge &R) const {
45 return L.Target->Name < R.Target->Name;
46 }
47 };
48
50 using edges = std::set<edge, ProfiledCallGraphEdgeComparer>;
51 using iterator = edges::iterator;
52 using const_iterator = edges::const_iterator;
53
55 {}
56
59};
60
62public:
64
65 // Constructor for non-CS profile.
67 uint64_t IgnoreColdCallThreshold = 0) {
69 "CS flat profile is not handled here");
70 for (const auto &Samples : ProfileMap) {
71 addProfiledCalls(Samples.second);
72 }
73
74 // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
75 // for a more stable call graph with "determinstic" edges from run to run.
76 trimColdEges(IgnoreColdCallThreshold);
77 }
78
79 // Constructor for CS profile.
81 uint64_t IgnoreColdCallThreshold = 0) {
82 // BFS traverse the context profile trie to add call edges for calls shown
83 // in context.
84 std::queue<ContextTrieNode *> Queue;
85 for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
86 ContextTrieNode *Callee = &Child.second;
87 addProfiledFunction(Callee->getFuncName());
88 Queue.push(Callee);
89 }
90
91 while (!Queue.empty()) {
92 ContextTrieNode *Caller = Queue.front();
93 Queue.pop();
94 FunctionSamples *CallerSamples = Caller->getFunctionSamples();
95
96 // Add calls for context.
97 // Note that callsite target samples are completely ignored since they can
98 // conflict with the context edges, which are formed by context
99 // compression during profile generation, for cyclic SCCs. This may
100 // further result in an SCC order incompatible with the purely
101 // context-based one, which may in turn block context-based inlining.
102 for (auto &Child : Caller->getAllChildContext()) {
103 ContextTrieNode *Callee = &Child.second;
104 addProfiledFunction(Callee->getFuncName());
105 Queue.push(Callee);
106
107 // Fetch edge weight from the profile.
108 uint64_t Weight;
109 FunctionSamples *CalleeSamples = Callee->getFunctionSamples();
110 if (!CalleeSamples || !CallerSamples) {
111 Weight = 0;
112 } else {
113 uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
114 uint64_t CallsiteCount = 0;
115 LineLocation Callsite = Callee->getCallSiteLoc();
116 if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
117 auto It = CallTargets->find(CalleeSamples->getFunction());
118 if (It != CallTargets->end())
119 CallsiteCount = It->second;
120 }
121 Weight = std::max(CallsiteCount, CalleeEntryCount);
122 }
123
124 addProfiledCall(Caller->getFuncName(), Callee->getFuncName(), Weight);
125 }
126 }
127
128 // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
129 // for a more stable call graph with "determinstic" edges from run to run.
130 trimColdEges(IgnoreColdCallThreshold);
131 }
132
133 iterator begin() { return Root.Edges.begin(); }
134 iterator end() { return Root.Edges.end(); }
136
138 if (!ProfiledFunctions.count(Name)) {
139 // Link to synthetic root to make sure every node is reachable
140 // from root. This does not affect SCC order.
141 // Store the pointer of the node because the map can be rehashed.
142 auto &Node =
143 ProfiledCallGraphNodeList.emplace_back(ProfiledCallGraphNode(Name));
144 ProfiledFunctions[Name] = &Node;
145 Root.Edges.emplace(&Root, ProfiledFunctions[Name], 0);
146 }
147 }
148
149private:
150 void addProfiledCall(FunctionId CallerName, FunctionId CalleeName,
151 uint64_t Weight = 0) {
152 assert(ProfiledFunctions.count(CallerName));
153 auto CalleeIt = ProfiledFunctions.find(CalleeName);
154 if (CalleeIt == ProfiledFunctions.end())
155 return;
156 ProfiledCallGraphEdge Edge(ProfiledFunctions[CallerName],
157 CalleeIt->second, Weight);
158 auto &Edges = ProfiledFunctions[CallerName]->Edges;
159 auto [EdgeIt, Inserted] = Edges.insert(Edge);
160 if (!Inserted) {
161 // Accumulate weight to the existing edge.
162 Edge.Weight += EdgeIt->Weight;
163 Edges.erase(EdgeIt);
164 Edges.insert(Edge);
165 }
166 }
167
168 void addProfiledCalls(const FunctionSamples &Samples) {
169 addProfiledFunction(Samples.getFunction());
170
171 for (const auto &Sample : Samples.getBodySamples()) {
172 for (const auto &[Target, Frequency] : Sample.second.getCallTargets()) {
173 addProfiledFunction(Target);
174 addProfiledCall(Samples.getFunction(), Target, Frequency);
175 }
176 }
177
178 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
179 for (const auto &InlinedSamples : CallsiteSamples.second) {
180 addProfiledFunction(InlinedSamples.first);
181 addProfiledCall(Samples.getFunction(), InlinedSamples.first,
182 InlinedSamples.second.getHeadSamplesEstimate());
183 addProfiledCalls(InlinedSamples.second);
184 }
185 }
186 }
187
188 // Trim edges with weight up to `Threshold`. Do not trim anything if
189 // `Threshold` is zero.
190 void trimColdEges(uint64_t Threshold = 0) {
191 if (!Threshold)
192 return;
193
194 for (auto &Node : ProfiledFunctions) {
195 auto &Edges = Node.second->Edges;
196 auto I = Edges.begin();
197 while (I != Edges.end()) {
198 if (I->Weight <= Threshold)
199 I = Edges.erase(I);
200 else
201 I++;
202 }
203 }
204 }
205
206 ProfiledCallGraphNode Root;
207 // backing buffer for ProfiledCallGraphNodes.
208 std::list<ProfiledCallGraphNode> ProfiledCallGraphNodeList;
209 HashKeyMap<llvm::DenseMap, FunctionId, ProfiledCallGraphNode*>
210 ProfiledFunctions;
211};
212
213} // end namespace sampleprof
214
215template <> struct GraphTraits<ProfiledCallGraphNode *> {
220
221 static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; }
222 static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); }
223 static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); }
224};
225
226template <>
230 return PCG->getEntryNode();
231 }
232
234 return PCG->begin();
235 }
236
238 return PCG->end();
239 }
240};
241
242} // end namespace llvm
243
244#endif
std::string Name
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
std::map< uint64_t, ContextTrieNode > & getAllChildContext()
Target - Wrapper for Target specific information.
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:745
FunctionId getFunction() const
Return the function name.
Definition: SampleProf.h:1074
ErrorOr< const SampleRecord::CallTargetMap & > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:887
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:949
ProfiledCallGraphNode * getEntryNode()
ProfiledCallGraphNode::iterator iterator
void addProfiledFunction(FunctionId Name)
ProfiledCallGraph(SampleContextTracker &ContextTracker, uint64_t IgnoreColdCallThreshold=0)
ProfiledCallGraph(SampleProfileMap &ProfileMap, uint64_t IgnoreColdCallThreshold=0)
This class provides operator overloads to the map container using MD5 as the key type,...
Definition: SampleProf.h:1313
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
#define N
static ChildIteratorType child_end(NodeRef N)
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(ProfiledCallGraph *PCG)
static ChildIteratorType nodes_end(ProfiledCallGraph *PCG)
static ChildIteratorType nodes_begin(ProfiledCallGraph *PCG)
Represents the relative location of an instruction.
Definition: SampleProf.h:280
ProfiledCallGraphEdge(ProfiledCallGraphNode *Source, ProfiledCallGraphNode *Target, uint64_t Weight)
bool operator()(const ProfiledCallGraphEdge &L, const ProfiledCallGraphEdge &R) const
ProfiledCallGraphNode(FunctionId FName=FunctionId())
std::set< edge, ProfiledCallGraphEdgeComparer > edges