LLVM 17.0.0git
ProfiledCallGraph.h
Go to the documentation of this file.
1//===-- ProfiledCallGraph.h - Profiled Call Graph ----------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
10#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
11
13#include "llvm/ADT/StringMap.h"
14#include "llvm/ADT/StringRef.h"
18#include <queue>
19#include <set>
20
21namespace llvm {
22namespace sampleprof {
23
24struct ProfiledCallGraphNode;
25
33
34 // The call destination is the only important data here,
35 // allow to transparently unwrap into it.
36 operator ProfiledCallGraphNode *() const { return Target; }
37};
38
40
41 // Sort edges by callee names only since all edges to be compared are from
42 // same caller. Edge weights are not considered either because for the same
43 // callee only the edge with the largest weight is added to the edge set.
46 const ProfiledCallGraphEdge &R) const {
47 return L.Target->Name < R.Target->Name;
48 }
49 };
50
52 using edges = std::set<edge, ProfiledCallGraphEdgeComparer>;
53 using iterator = edges::iterator;
54 using const_iterator = edges::const_iterator;
55
57
60};
61
63public:
65
66 // Constructor for non-CS profile.
68 uint64_t IgnoreColdCallThreshold = 0) {
70 "CS flat profile is not handled here");
71 for (const auto &Samples : ProfileMap) {
72 addProfiledCalls(Samples.second);
73 }
74
75 // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
76 // for a more stable call graph with "determinstic" edges from run to run.
77 trimColdEges(IgnoreColdCallThreshold);
78 }
79
80 // Constructor for CS profile.
82 uint64_t IgnoreColdCallThreshold = 0) {
83 // BFS traverse the context profile trie to add call edges for calls shown
84 // in context.
85 std::queue<ContextTrieNode *> Queue;
86 for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
87 ContextTrieNode *Callee = &Child.second;
89 Queue.push(Callee);
90 }
91
92 while (!Queue.empty()) {
93 ContextTrieNode *Caller = Queue.front();
94 Queue.pop();
95 FunctionSamples *CallerSamples = Caller->getFunctionSamples();
96
97 // Add calls for context.
98 // Note that callsite target samples are completely ignored since they can
99 // conflict with the context edges, which are formed by context
100 // compression during profile generation, for cyclic SCCs. This may
101 // further result in an SCC order incompatible with the purely
102 // context-based one, which may in turn block context-based inlining.
103 for (auto &Child : Caller->getAllChildContext()) {
104 ContextTrieNode *Callee = &Child.second;
106 Queue.push(Callee);
107
108 // Fetch edge weight from the profile.
109 uint64_t Weight;
110 FunctionSamples *CalleeSamples = Callee->getFunctionSamples();
111 if (!CalleeSamples || !CallerSamples) {
112 Weight = 0;
113 } else {
114 uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
115 uint64_t CallsiteCount = 0;
116 LineLocation Callsite = Callee->getCallSiteLoc();
117 if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
118 SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
119 auto It = TargetCounts.find(CalleeSamples->getName());
120 if (It != TargetCounts.end())
121 CallsiteCount = It->second;
122 }
123 Weight = std::max(CallsiteCount, CalleeEntryCount);
124 }
125
126 addProfiledCall(ContextTracker.getFuncNameFor(Caller),
127 ContextTracker.getFuncNameFor(Callee), Weight);
128 }
129 }
130
131 // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
132 // for a more stable call graph with "determinstic" edges from run to run.
133 trimColdEges(IgnoreColdCallThreshold);
134 }
135
136 iterator begin() { return Root.Edges.begin(); }
137 iterator end() { return Root.Edges.end(); }
139
141 if (!ProfiledFunctions.count(Name)) {
142 // Link to synthetic root to make sure every node is reachable
143 // from root. This does not affect SCC order.
144 ProfiledFunctions[Name] = ProfiledCallGraphNode(Name);
145 Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0);
146 }
147 }
148
149private:
150 void addProfiledCall(StringRef CallerName, StringRef CalleeName,
151 uint64_t Weight = 0) {
152 assert(ProfiledFunctions.count(CallerName));
153 auto CalleeIt = ProfiledFunctions.find(CalleeName);
154 if (CalleeIt == ProfiledFunctions.end())
155 return;
156 ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName],
157 &CalleeIt->second, Weight);
158 auto &Edges = ProfiledFunctions[CallerName].Edges;
159 auto EdgeIt = Edges.find(Edge);
160 if (EdgeIt == Edges.end()) {
161 Edges.insert(Edge);
162 } else {
163 // Accumulate weight to the existing edge.
164 Edge.Weight += EdgeIt->Weight;
165 Edges.erase(EdgeIt);
166 Edges.insert(Edge);
167 }
168 }
169
170 void addProfiledCalls(const FunctionSamples &Samples) {
171 addProfiledFunction(Samples.getFuncName());
172
173 for (const auto &Sample : Samples.getBodySamples()) {
174 for (const auto &[Target, Frequency] : Sample.second.getCallTargets()) {
175 addProfiledFunction(Target);
176 addProfiledCall(Samples.getFuncName(), Target, Frequency);
177 }
178 }
179
180 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
181 for (const auto &InlinedSamples : CallsiteSamples.second) {
182 addProfiledFunction(InlinedSamples.first);
183 addProfiledCall(Samples.getFuncName(), InlinedSamples.first,
184 InlinedSamples.second.getHeadSamplesEstimate());
185 addProfiledCalls(InlinedSamples.second);
186 }
187 }
188 }
189
190 // Trim edges with weight up to `Threshold`. Do not trim anything if
191 // `Threshold` is zero.
192 void trimColdEges(uint64_t Threshold = 0) {
193 if (!Threshold)
194 return;
195
196 for (auto &Node : ProfiledFunctions) {
197 auto &Edges = Node.second.Edges;
198 auto I = Edges.begin();
199 while (I != Edges.end()) {
200 if (I->Weight <= Threshold)
201 I = Edges.erase(I);
202 else
203 I++;
204 }
205 }
206 }
207
208 ProfiledCallGraphNode Root;
209 StringMap<ProfiledCallGraphNode> ProfiledFunctions;
210};
211
212} // end namespace sampleprof
213
214template <> struct GraphTraits<ProfiledCallGraphNode *> {
219
220 static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; }
221 static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); }
222 static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); }
223};
224
225template <>
229 return PCG->getEntryNode();
230 }
231
233 return PCG->begin();
234 }
235
237 return PCG->end();
238 }
239};
240
241} // end namespace llvm
242
243#endif
This file defines the StringMap class.
amdgpu Simplify well known AMD library false FunctionCallee Callee
std::string Name
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
std::map< uint64_t, ContextTrieNode > & getAllChildContext()
StringRef getFuncNameFor(ContextTrieNode *Node) const
iterator end()
Definition: StringMap.h:204
iterator find(StringRef Key)
Definition: StringMap.h:217
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Representation of the samples collected for a function.
Definition: SampleProf.h:734
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:869
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:929
StringRef getName() const
Return the function name.
Definition: SampleProf.h:1050
ProfiledCallGraphNode * getEntryNode()
ProfiledCallGraphNode::iterator iterator
ProfiledCallGraph(SampleContextTracker &ContextTracker, uint64_t IgnoreColdCallThreshold=0)
ProfiledCallGraph(SampleProfileMap &ProfileMap, uint64_t IgnoreColdCallThreshold=0)
std::unordered_map< SampleContext, FunctionSamples, SampleContext::Hash > SampleProfileMap
Definition: SampleProf.h:1271
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
#define N
static ChildIteratorType child_end(NodeRef N)
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(ProfiledCallGraph *PCG)
static ChildIteratorType nodes_end(ProfiledCallGraph *PCG)
static ChildIteratorType nodes_begin(ProfiledCallGraph *PCG)
Represents the relative location of an instruction.
Definition: SampleProf.h:289
ProfiledCallGraphEdge(ProfiledCallGraphNode *Source, ProfiledCallGraphNode *Target, uint64_t Weight)
bool operator()(const ProfiledCallGraphEdge &L, const ProfiledCallGraphEdge &R) const
ProfiledCallGraphNode(StringRef FName=StringRef())
std::set< edge, ProfiledCallGraphEdgeComparer > edges