File: | llvm/lib/Transforms/IPO/SampleProfile.cpp |
Warning: | line 1225, column 7 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements the SampleProfileLoader transformation. This pass | |||
10 | // reads a profile file generated by a sampling profiler (e.g. Linux Perf - | |||
11 | // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the | |||
12 | // profile information in the given profile. | |||
13 | // | |||
14 | // This pass generates branch weight annotations on the IR: | |||
15 | // | |||
16 | // - prof: Represents branch weights. This annotation is added to branches | |||
17 | // to indicate the weights of each edge coming out of the branch. | |||
18 | // The weight of each edge is the weight of the target block for | |||
19 | // that edge. The weight of a block B is computed as the maximum | |||
20 | // number of samples found in B. | |||
21 | // | |||
22 | //===----------------------------------------------------------------------===// | |||
23 | ||||
24 | #include "llvm/Transforms/IPO/SampleProfile.h" | |||
25 | #include "llvm/ADT/ArrayRef.h" | |||
26 | #include "llvm/ADT/DenseMap.h" | |||
27 | #include "llvm/ADT/DenseSet.h" | |||
28 | #include "llvm/ADT/None.h" | |||
29 | #include "llvm/ADT/PriorityQueue.h" | |||
30 | #include "llvm/ADT/SCCIterator.h" | |||
31 | #include "llvm/ADT/SmallPtrSet.h" | |||
32 | #include "llvm/ADT/SmallSet.h" | |||
33 | #include "llvm/ADT/SmallVector.h" | |||
34 | #include "llvm/ADT/Statistic.h" | |||
35 | #include "llvm/ADT/StringMap.h" | |||
36 | #include "llvm/ADT/StringRef.h" | |||
37 | #include "llvm/ADT/Twine.h" | |||
38 | #include "llvm/Analysis/AssumptionCache.h" | |||
39 | #include "llvm/Analysis/CallGraph.h" | |||
40 | #include "llvm/Analysis/CallGraphSCCPass.h" | |||
41 | #include "llvm/Analysis/InlineAdvisor.h" | |||
42 | #include "llvm/Analysis/InlineCost.h" | |||
43 | #include "llvm/Analysis/LoopInfo.h" | |||
44 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
45 | #include "llvm/Analysis/PostDominators.h" | |||
46 | #include "llvm/Analysis/ProfileSummaryInfo.h" | |||
47 | #include "llvm/Analysis/ReplayInlineAdvisor.h" | |||
48 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
49 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
50 | #include "llvm/IR/BasicBlock.h" | |||
51 | #include "llvm/IR/CFG.h" | |||
52 | #include "llvm/IR/DebugInfoMetadata.h" | |||
53 | #include "llvm/IR/DebugLoc.h" | |||
54 | #include "llvm/IR/DiagnosticInfo.h" | |||
55 | #include "llvm/IR/Dominators.h" | |||
56 | #include "llvm/IR/Function.h" | |||
57 | #include "llvm/IR/GlobalValue.h" | |||
58 | #include "llvm/IR/InstrTypes.h" | |||
59 | #include "llvm/IR/Instruction.h" | |||
60 | #include "llvm/IR/Instructions.h" | |||
61 | #include "llvm/IR/IntrinsicInst.h" | |||
62 | #include "llvm/IR/LLVMContext.h" | |||
63 | #include "llvm/IR/MDBuilder.h" | |||
64 | #include "llvm/IR/Module.h" | |||
65 | #include "llvm/IR/PassManager.h" | |||
66 | #include "llvm/IR/ValueSymbolTable.h" | |||
67 | #include "llvm/InitializePasses.h" | |||
68 | #include "llvm/Pass.h" | |||
69 | #include "llvm/ProfileData/InstrProf.h" | |||
70 | #include "llvm/ProfileData/SampleProf.h" | |||
71 | #include "llvm/ProfileData/SampleProfReader.h" | |||
72 | #include "llvm/Support/Casting.h" | |||
73 | #include "llvm/Support/CommandLine.h" | |||
74 | #include "llvm/Support/Debug.h" | |||
75 | #include "llvm/Support/ErrorHandling.h" | |||
76 | #include "llvm/Support/ErrorOr.h" | |||
77 | #include "llvm/Support/GenericDomTree.h" | |||
78 | #include "llvm/Support/raw_ostream.h" | |||
79 | #include "llvm/Transforms/IPO.h" | |||
80 | #include "llvm/Transforms/IPO/SampleContextTracker.h" | |||
81 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" | |||
82 | #include "llvm/Transforms/Instrumentation.h" | |||
83 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" | |||
84 | #include "llvm/Transforms/Utils/Cloning.h" | |||
85 | #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" | |||
86 | #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" | |||
87 | #include <algorithm> | |||
88 | #include <cassert> | |||
89 | #include <cstdint> | |||
90 | #include <functional> | |||
91 | #include <limits> | |||
92 | #include <map> | |||
93 | #include <memory> | |||
94 | #include <queue> | |||
95 | #include <string> | |||
96 | #include <system_error> | |||
97 | #include <utility> | |||
98 | #include <vector> | |||
99 | ||||
100 | using namespace llvm; | |||
101 | using namespace sampleprof; | |||
102 | using namespace llvm::sampleprofutil; | |||
103 | using ProfileCount = Function::ProfileCount; | |||
104 | #define DEBUG_TYPE"sample-profile" "sample-profile" | |||
105 | #define CSINLINE_DEBUG"sample-profile" "-inline" DEBUG_TYPE"sample-profile" "-inline" | |||
106 | ||||
107 | STATISTIC(NumCSInlined,static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined" , "Number of functions inlined with context sensitive profile" } | |||
108 | "Number of functions inlined with context sensitive profile")static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined" , "Number of functions inlined with context sensitive profile" }; | |||
109 | STATISTIC(NumCSNotInlined,static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined" , "Number of functions not inlined with context sensitive profile" } | |||
110 | "Number of functions not inlined with context sensitive profile")static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined" , "Number of functions not inlined with context sensitive profile" }; | |||
111 | STATISTIC(NumMismatchedProfile,static llvm::Statistic NumMismatchedProfile = {"sample-profile" , "NumMismatchedProfile", "Number of functions with CFG mismatched profile" } | |||
112 | "Number of functions with CFG mismatched profile")static llvm::Statistic NumMismatchedProfile = {"sample-profile" , "NumMismatchedProfile", "Number of functions with CFG mismatched profile" }; | |||
113 | STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile")static llvm::Statistic NumMatchedProfile = {"sample-profile", "NumMatchedProfile", "Number of functions with CFG matched profile" }; | |||
114 | STATISTIC(NumDuplicatedInlinesite,static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile" , "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor" } | |||
115 | "Number of inlined callsites with a partial distribution factor")static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile" , "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor" }; | |||
116 | ||||
117 | STATISTIC(NumCSInlinedHitMinLimit,static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile" , "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit" } | |||
118 | "Number of functions with FDO inline stopped due to min size limit")static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile" , "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit" }; | |||
119 | STATISTIC(NumCSInlinedHitMaxLimit,static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile" , "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit" } | |||
120 | "Number of functions with FDO inline stopped due to max size limit")static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile" , "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit" }; | |||
121 | STATISTIC(static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" } | |||
122 | NumCSInlinedHitGrowthLimit,static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" } | |||
123 | "Number of functions with FDO inline stopped due to growth size limit")static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" }; | |||
124 | ||||
125 | // Command line option to specify the file to read samples from. This is | |||
126 | // mainly used for debugging. | |||
127 | static cl::opt<std::string> SampleProfileFile( | |||
128 | "sample-profile-file", cl::init(""), cl::value_desc("filename"), | |||
129 | cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); | |||
130 | ||||
131 | // The named file contains a set of transformations that may have been applied | |||
132 | // to the symbol names between the program from which the sample data was | |||
133 | // collected and the current program's symbols. | |||
134 | static cl::opt<std::string> SampleProfileRemappingFile( | |||
135 | "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), | |||
136 | cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); | |||
137 | ||||
138 | static cl::opt<bool> ProfileSampleAccurate( | |||
139 | "profile-sample-accurate", cl::Hidden, cl::init(false), | |||
140 | cl::desc("If the sample profile is accurate, we will mark all un-sampled " | |||
141 | "callsite and function as having 0 samples. Otherwise, treat " | |||
142 | "un-sampled callsites and functions conservatively as unknown. ")); | |||
143 | ||||
144 | static cl::opt<bool> ProfileAccurateForSymsInList( | |||
145 | "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, | |||
146 | cl::init(true), | |||
147 | cl::desc("For symbols in profile symbol list, regard their profiles to " | |||
148 | "be accurate. It may be overriden by profile-sample-accurate. ")); | |||
149 | ||||
150 | static cl::opt<bool> ProfileMergeInlinee( | |||
151 | "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), | |||
152 | cl::desc("Merge past inlinee's profile to outline version if sample " | |||
153 | "profile loader decided not to inline a call site. It will " | |||
154 | "only be enabled when top-down order of profile loading is " | |||
155 | "enabled. ")); | |||
156 | ||||
157 | static cl::opt<bool> ProfileTopDownLoad( | |||
158 | "sample-profile-top-down-load", cl::Hidden, cl::init(true), | |||
159 | cl::desc("Do profile annotation and inlining for functions in top-down " | |||
160 | "order of call graph during sample profile loading. It only " | |||
161 | "works for new pass manager. ")); | |||
162 | ||||
163 | static cl::opt<bool> UseProfileIndirectCallEdges( | |||
164 | "use-profile-indirect-call-edges", cl::init(true), cl::Hidden, | |||
165 | cl::desc("Considering indirect call samples from profile when top-down " | |||
166 | "processing functions. Only CSSPGO is supported.")); | |||
167 | ||||
168 | static cl::opt<bool> UseProfileTopDownOrder( | |||
169 | "use-profile-top-down-order", cl::init(false), cl::Hidden, | |||
170 | cl::desc("Process functions in one SCC in a top-down order " | |||
171 | "based on the input profile.")); | |||
172 | ||||
173 | static cl::opt<bool> ProfileSizeInline( | |||
174 | "sample-profile-inline-size", cl::Hidden, cl::init(false), | |||
175 | cl::desc("Inline cold call sites in profile loader if it's beneficial " | |||
176 | "for code size.")); | |||
177 | ||||
178 | static cl::opt<int> ProfileInlineGrowthLimit( | |||
179 | "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), | |||
180 | cl::desc("The size growth ratio limit for proirity-based sample profile " | |||
181 | "loader inlining.")); | |||
182 | ||||
183 | static cl::opt<int> ProfileInlineLimitMin( | |||
184 | "sample-profile-inline-limit-min", cl::Hidden, cl::init(100), | |||
185 | cl::desc("The lower bound of size growth limit for " | |||
186 | "proirity-based sample profile loader inlining.")); | |||
187 | ||||
188 | static cl::opt<int> ProfileInlineLimitMax( | |||
189 | "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), | |||
190 | cl::desc("The upper bound of size growth limit for " | |||
191 | "proirity-based sample profile loader inlining.")); | |||
192 | ||||
193 | static cl::opt<int> ProfileICPThreshold( | |||
194 | "sample-profile-icp-threshold", cl::Hidden, cl::init(5), | |||
195 | cl::desc( | |||
196 | "Relative hotness threshold for indirect " | |||
197 | "call promotion in proirity-based sample profile loader inlining.")); | |||
198 | ||||
199 | static cl::opt<int> SampleHotCallSiteThreshold( | |||
200 | "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), | |||
201 | cl::desc("Hot callsite threshold for proirity-based sample profile loader " | |||
202 | "inlining.")); | |||
203 | ||||
204 | static cl::opt<bool> CallsitePrioritizedInline( | |||
205 | "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, | |||
206 | cl::init(false), | |||
207 | cl::desc("Use call site prioritized inlining for sample profile loader." | |||
208 | "Currently only CSSPGO is supported.")); | |||
209 | ||||
210 | static cl::opt<int> SampleColdCallSiteThreshold( | |||
211 | "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), | |||
212 | cl::desc("Threshold for inlining cold callsites")); | |||
213 | ||||
214 | static cl::opt<std::string> ProfileInlineReplayFile( | |||
215 | "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), | |||
216 | cl::desc( | |||
217 | "Optimization remarks file containing inline remarks to be replayed " | |||
218 | "by inlining from sample profile loader."), | |||
219 | cl::Hidden); | |||
220 | ||||
221 | extern cl::opt<unsigned> MaxNumPromotions; | |||
222 | ||||
223 | namespace { | |||
224 | ||||
225 | using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; | |||
226 | using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>; | |||
227 | using Edge = std::pair<const BasicBlock *, const BasicBlock *>; | |||
228 | using EdgeWeightMap = DenseMap<Edge, uint64_t>; | |||
229 | using BlockEdgeMap = | |||
230 | DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; | |||
231 | ||||
232 | class GUIDToFuncNameMapper { | |||
233 | public: | |||
234 | GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, | |||
235 | DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) | |||
236 | : CurrentReader(Reader), CurrentModule(M), | |||
237 | CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { | |||
238 | if (!CurrentReader.useMD5()) | |||
239 | return; | |||
240 | ||||
241 | for (const auto &F : CurrentModule) { | |||
242 | StringRef OrigName = F.getName(); | |||
243 | CurrentGUIDToFuncNameMap.insert( | |||
244 | {Function::getGUID(OrigName), OrigName}); | |||
245 | ||||
246 | // Local to global var promotion used by optimization like thinlto | |||
247 | // will rename the var and add suffix like ".llvm.xxx" to the | |||
248 | // original local name. In sample profile, the suffixes of function | |||
249 | // names are all stripped. Since it is possible that the mapper is | |||
250 | // built in post-thin-link phase and var promotion has been done, | |||
251 | // we need to add the substring of function name without the suffix | |||
252 | // into the GUIDToFuncNameMap. | |||
253 | StringRef CanonName = FunctionSamples::getCanonicalFnName(F); | |||
254 | if (CanonName != OrigName) | |||
255 | CurrentGUIDToFuncNameMap.insert( | |||
256 | {Function::getGUID(CanonName), CanonName}); | |||
257 | } | |||
258 | ||||
259 | // Update GUIDToFuncNameMap for each function including inlinees. | |||
260 | SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); | |||
261 | } | |||
262 | ||||
263 | ~GUIDToFuncNameMapper() { | |||
264 | if (!CurrentReader.useMD5()) | |||
265 | return; | |||
266 | ||||
267 | CurrentGUIDToFuncNameMap.clear(); | |||
268 | ||||
269 | // Reset GUIDToFuncNameMap for of each function as they're no | |||
270 | // longer valid at this point. | |||
271 | SetGUIDToFuncNameMapForAll(nullptr); | |||
272 | } | |||
273 | ||||
274 | private: | |||
275 | void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) { | |||
276 | std::queue<FunctionSamples *> FSToUpdate; | |||
277 | for (auto &IFS : CurrentReader.getProfiles()) { | |||
278 | FSToUpdate.push(&IFS.second); | |||
279 | } | |||
280 | ||||
281 | while (!FSToUpdate.empty()) { | |||
282 | FunctionSamples *FS = FSToUpdate.front(); | |||
283 | FSToUpdate.pop(); | |||
284 | FS->GUIDToFuncNameMap = Map; | |||
285 | for (const auto &ICS : FS->getCallsiteSamples()) { | |||
286 | const FunctionSamplesMap &FSMap = ICS.second; | |||
287 | for (auto &IFS : FSMap) { | |||
288 | FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second); | |||
289 | FSToUpdate.push(&FS); | |||
290 | } | |||
291 | } | |||
292 | } | |||
293 | } | |||
294 | ||||
295 | SampleProfileReader &CurrentReader; | |||
296 | Module &CurrentModule; | |||
297 | DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap; | |||
298 | }; | |||
299 | ||||
300 | // Inline candidate used by iterative callsite prioritized inliner | |||
301 | struct InlineCandidate { | |||
302 | CallBase *CallInstr; | |||
303 | const FunctionSamples *CalleeSamples; | |||
304 | // Prorated callsite count, which will be used to guide inlining. For example, | |||
305 | // if a callsite is duplicated in LTO prelink, then in LTO postlink the two | |||
306 | // copies will get their own distribution factors and their prorated counts | |||
307 | // will be used to decide if they should be inlined independently. | |||
308 | uint64_t CallsiteCount; | |||
309 | // Call site distribution factor to prorate the profile samples for a | |||
310 | // duplicated callsite. Default value is 1.0. | |||
311 | float CallsiteDistribution; | |||
312 | }; | |||
313 | ||||
314 | // Inline candidate comparer using call site weight | |||
315 | struct CandidateComparer { | |||
316 | bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) { | |||
317 | if (LHS.CallsiteCount != RHS.CallsiteCount) | |||
318 | return LHS.CallsiteCount < RHS.CallsiteCount; | |||
319 | ||||
320 | // Tie breaker using GUID so we have stable/deterministic inlining order | |||
321 | assert(LHS.CalleeSamples && RHS.CalleeSamples &&((LHS.CalleeSamples && RHS.CalleeSamples && "Expect non-null FunctionSamples" ) ? static_cast<void> (0) : __assert_fail ("LHS.CalleeSamples && RHS.CalleeSamples && \"Expect non-null FunctionSamples\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 322, __PRETTY_FUNCTION__)) | |||
322 | "Expect non-null FunctionSamples")((LHS.CalleeSamples && RHS.CalleeSamples && "Expect non-null FunctionSamples" ) ? static_cast<void> (0) : __assert_fail ("LHS.CalleeSamples && RHS.CalleeSamples && \"Expect non-null FunctionSamples\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 322, __PRETTY_FUNCTION__)); | |||
323 | return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) < | |||
324 | RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName()); | |||
325 | } | |||
326 | }; | |||
327 | ||||
328 | using CandidateQueue = | |||
329 | PriorityQueue<InlineCandidate, std::vector<InlineCandidate>, | |||
330 | CandidateComparer>; | |||
331 | ||||
332 | /// Sample profile pass. | |||
333 | /// | |||
334 | /// This pass reads profile data from the file specified by | |||
335 | /// -sample-profile-file and annotates every affected function with the | |||
336 | /// profile information found in that file. | |||
337 | class SampleProfileLoader final | |||
338 | : public SampleProfileLoaderBaseImpl<BasicBlock> { | |||
339 | public: | |||
340 | SampleProfileLoader( | |||
341 | StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase, | |||
342 | std::function<AssumptionCache &(Function &)> GetAssumptionCache, | |||
343 | std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo, | |||
344 | std::function<const TargetLibraryInfo &(Function &)> GetTLI) | |||
345 | : SampleProfileLoaderBaseImpl(std::string(Name)), | |||
346 | GetAC(std::move(GetAssumptionCache)), | |||
347 | GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), | |||
348 | RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {} | |||
349 | ||||
350 | bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); | |||
351 | bool runOnModule(Module &M, ModuleAnalysisManager *AM, | |||
352 | ProfileSummaryInfo *_PSI, CallGraph *CG); | |||
353 | ||||
354 | protected: | |||
355 | bool runOnFunction(Function &F, ModuleAnalysisManager *AM); | |||
356 | bool emitAnnotations(Function &F); | |||
357 | ErrorOr<uint64_t> getInstWeight(const Instruction &I) override; | |||
358 | ErrorOr<uint64_t> getProbeWeight(const Instruction &I); | |||
359 | const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const; | |||
360 | const FunctionSamples * | |||
361 | findFunctionSamples(const Instruction &I) const override; | |||
362 | std::vector<const FunctionSamples *> | |||
363 | findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; | |||
364 | // Attempt to promote indirect call and also inline the promoted call | |||
365 | bool tryPromoteAndInlineCandidate( | |||
366 | Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, | |||
367 | uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns, | |||
368 | SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); | |||
369 | bool inlineHotFunctions(Function &F, | |||
370 | DenseSet<GlobalValue::GUID> &InlinedGUIDs); | |||
371 | InlineCost shouldInlineCandidate(InlineCandidate &Candidate); | |||
372 | bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); | |||
373 | bool | |||
374 | tryInlineCandidate(InlineCandidate &Candidate, | |||
375 | SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); | |||
376 | bool | |||
377 | inlineHotFunctionsWithPriority(Function &F, | |||
378 | DenseSet<GlobalValue::GUID> &InlinedGUIDs); | |||
379 | // Inline cold/small functions in addition to hot ones | |||
380 | bool shouldInlineColdCallee(CallBase &CallInst); | |||
381 | void emitOptimizationRemarksForInlineCandidates( | |||
382 | const SmallVectorImpl<CallBase *> &Candidates, const Function &F, | |||
383 | bool Hot); | |||
384 | std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); | |||
385 | void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples); | |||
386 | void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap); | |||
387 | void generateMDProfMetadata(Function &F); | |||
388 | ||||
389 | /// Map from function name to Function *. Used to find the function from | |||
390 | /// the function name. If the function name contains suffix, additional | |||
391 | /// entry is added to map from the stripped name to the function if there | |||
392 | /// is one-to-one mapping. | |||
393 | StringMap<Function *> SymbolMap; | |||
394 | ||||
395 | std::function<AssumptionCache &(Function &)> GetAC; | |||
396 | std::function<TargetTransformInfo &(Function &)> GetTTI; | |||
397 | std::function<const TargetLibraryInfo &(Function &)> GetTLI; | |||
398 | ||||
399 | /// Profile tracker for different context. | |||
400 | std::unique_ptr<SampleContextTracker> ContextTracker; | |||
401 | ||||
402 | /// Name of the profile remapping file to load. | |||
403 | std::string RemappingFilename; | |||
404 | ||||
405 | /// Flag indicating whether the profile input loaded successfully. | |||
406 | bool ProfileIsValid = false; | |||
407 | ||||
408 | /// Flag indicating whether input profile is context-sensitive | |||
409 | bool ProfileIsCS = false; | |||
410 | ||||
411 | /// Flag indicating which LTO/ThinLTO phase the pass is invoked in. | |||
412 | /// | |||
413 | /// We need to know the LTO phase because for example in ThinLTOPrelink | |||
414 | /// phase, in annotation, we should not promote indirect calls. Instead, | |||
415 | /// we will mark GUIDs that needs to be annotated to the function. | |||
416 | ThinOrFullLTOPhase LTOPhase; | |||
417 | ||||
418 | /// Profle Symbol list tells whether a function name appears in the binary | |||
419 | /// used to generate the current profile. | |||
420 | std::unique_ptr<ProfileSymbolList> PSL; | |||
421 | ||||
422 | /// Total number of samples collected in this profile. | |||
423 | /// | |||
424 | /// This is the sum of all the samples collected in all the functions executed | |||
425 | /// at runtime. | |||
426 | uint64_t TotalCollectedSamples = 0; | |||
427 | ||||
428 | // Information recorded when we declined to inline a call site | |||
429 | // because we have determined it is too cold is accumulated for | |||
430 | // each callee function. Initially this is just the entry count. | |||
431 | struct NotInlinedProfileInfo { | |||
432 | uint64_t entryCount; | |||
433 | }; | |||
434 | DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; | |||
435 | ||||
436 | // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for | |||
437 | // all the function symbols defined or declared in current module. | |||
438 | DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; | |||
439 | ||||
440 | // All the Names used in FunctionSamples including outline function | |||
441 | // names, inline instance names and call target names. | |||
442 | StringSet<> NamesInProfile; | |||
443 | ||||
444 | // For symbol in profile symbol list, whether to regard their profiles | |||
445 | // to be accurate. It is mainly decided by existance of profile symbol | |||
446 | // list and -profile-accurate-for-symsinlist flag, but it can be | |||
447 | // overriden by -profile-sample-accurate or profile-sample-accurate | |||
448 | // attribute. | |||
449 | bool ProfAccForSymsInList; | |||
450 | ||||
451 | // External inline advisor used to replay inline decision from remarks. | |||
452 | std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor; | |||
453 | ||||
454 | // A pseudo probe helper to correlate the imported sample counts. | |||
455 | std::unique_ptr<PseudoProbeManager> ProbeManager; | |||
456 | }; | |||
457 | ||||
458 | class SampleProfileLoaderLegacyPass : public ModulePass { | |||
459 | public: | |||
460 | // Class identification, replacement for typeinfo | |||
461 | static char ID; | |||
462 | ||||
463 | SampleProfileLoaderLegacyPass( | |||
464 | StringRef Name = SampleProfileFile, | |||
465 | ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) | |||
466 | : ModulePass(ID), SampleLoader( | |||
467 | Name, SampleProfileRemappingFile, LTOPhase, | |||
468 | [&](Function &F) -> AssumptionCache & { | |||
469 | return ACT->getAssumptionCache(F); | |||
470 | }, | |||
471 | [&](Function &F) -> TargetTransformInfo & { | |||
472 | return TTIWP->getTTI(F); | |||
473 | }, | |||
474 | [&](Function &F) -> TargetLibraryInfo & { | |||
475 | return TLIWP->getTLI(F); | |||
476 | }) { | |||
477 | initializeSampleProfileLoaderLegacyPassPass( | |||
478 | *PassRegistry::getPassRegistry()); | |||
479 | } | |||
480 | ||||
481 | void dump() { SampleLoader.dump(); } | |||
482 | ||||
483 | bool doInitialization(Module &M) override { | |||
484 | return SampleLoader.doInitialization(M); | |||
485 | } | |||
486 | ||||
487 | StringRef getPassName() const override { return "Sample profile pass"; } | |||
488 | bool runOnModule(Module &M) override; | |||
489 | ||||
490 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
491 | AU.addRequired<AssumptionCacheTracker>(); | |||
492 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
493 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
494 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); | |||
495 | } | |||
496 | ||||
497 | private: | |||
498 | SampleProfileLoader SampleLoader; | |||
499 | AssumptionCacheTracker *ACT = nullptr; | |||
500 | TargetTransformInfoWrapperPass *TTIWP = nullptr; | |||
501 | TargetLibraryInfoWrapperPass *TLIWP = nullptr; | |||
502 | }; | |||
503 | ||||
504 | } // end anonymous namespace | |||
505 | ||||
506 | ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { | |||
507 | if (FunctionSamples::ProfileIsProbeBased) | |||
508 | return getProbeWeight(Inst); | |||
509 | ||||
510 | const DebugLoc &DLoc = Inst.getDebugLoc(); | |||
511 | if (!DLoc) | |||
512 | return std::error_code(); | |||
513 | ||||
514 | // Ignore all intrinsics, phinodes and branch instructions. | |||
515 | // Branch and phinodes instruction usually contains debug info from sources | |||
516 | // outside of the residing basic block, thus we ignore them during annotation. | |||
517 | if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst)) | |||
518 | return std::error_code(); | |||
519 | ||||
520 | // If a direct call/invoke instruction is inlined in profile | |||
521 | // (findCalleeFunctionSamples returns non-empty result), but not inlined here, | |||
522 | // it means that the inlined callsite has no sample, thus the call | |||
523 | // instruction should have 0 count. | |||
524 | if (!ProfileIsCS) | |||
525 | if (const auto *CB = dyn_cast<CallBase>(&Inst)) | |||
526 | if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) | |||
527 | return 0; | |||
528 | ||||
529 | return getInstWeightImpl(Inst); | |||
530 | } | |||
531 | ||||
532 | ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { | |||
533 | assert(FunctionSamples::ProfileIsProbeBased &&((FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based" ) ? static_cast<void> (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 534, __PRETTY_FUNCTION__)) | |||
534 | "Profile is not pseudo probe based")((FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based" ) ? static_cast<void> (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 534, __PRETTY_FUNCTION__)); | |||
535 | Optional<PseudoProbe> Probe = extractProbe(Inst); | |||
536 | if (!Probe) | |||
537 | return std::error_code(); | |||
538 | ||||
539 | // Ignore danling probes since they are logically deleted and should not | |||
540 | // consume any profile samples. | |||
541 | if (Probe->isDangling()) | |||
542 | return std::error_code(); | |||
543 | ||||
544 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
545 | if (!FS) | |||
546 | return std::error_code(); | |||
547 | ||||
548 | // If a direct call/invoke instruction is inlined in profile | |||
549 | // (findCalleeFunctionSamples returns non-empty result), but not inlined here, | |||
550 | // it means that the inlined callsite has no sample, thus the call | |||
551 | // instruction should have 0 count. | |||
552 | if (const auto *CB = dyn_cast<CallBase>(&Inst)) | |||
553 | if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) | |||
554 | return 0; | |||
555 | ||||
556 | const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0); | |||
557 | if (R) { | |||
558 | uint64_t Samples = R.get() * Probe->Factor; | |||
559 | bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); | |||
560 | if (FirstMark) { | |||
561 | ORE->emit([&]() { | |||
562 | OptimizationRemarkAnalysis Remark(DEBUG_TYPE"sample-profile", "AppliedSamples", &Inst); | |||
563 | Remark << "Applied " << ore::NV("NumSamples", Samples); | |||
564 | Remark << " samples from profile (ProbeId="; | |||
565 | Remark << ore::NV("ProbeId", Probe->Id); | |||
566 | Remark << ", Factor="; | |||
567 | Remark << ore::NV("Factor", Probe->Factor); | |||
568 | Remark << ", OriginalSamples="; | |||
569 | Remark << ore::NV("OriginalSamples", R.get()); | |||
570 | Remark << ")"; | |||
571 | return Remark; | |||
572 | }); | |||
573 | } | |||
574 | LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Instdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false) | |||
575 | << " - weight: " << R.get() << " - factor: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false) | |||
576 | << format("%0.2f", Probe->Factor) << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false); | |||
577 | return Samples; | |||
578 | } | |||
579 | return R; | |||
580 | } | |||
581 | ||||
582 | /// Get the FunctionSamples for a call instruction. | |||
583 | /// | |||
584 | /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined | |||
585 | /// instance in which that call instruction is calling to. It contains | |||
586 | /// all samples that resides in the inlined instance. We first find the | |||
587 | /// inlined instance in which the call instruction is from, then we | |||
588 | /// traverse its children to find the callsite with the matching | |||
589 | /// location. | |||
590 | /// | |||
591 | /// \param Inst Call/Invoke instruction to query. | |||
592 | /// | |||
593 | /// \returns The FunctionSamples pointer to the inlined instance. | |||
594 | const FunctionSamples * | |||
595 | SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { | |||
596 | const DILocation *DIL = Inst.getDebugLoc(); | |||
597 | if (!DIL) { | |||
598 | return nullptr; | |||
599 | } | |||
600 | ||||
601 | StringRef CalleeName; | |||
602 | if (Function *Callee = Inst.getCalledFunction()) | |||
603 | CalleeName = FunctionSamples::getCanonicalFnName(*Callee); | |||
604 | ||||
605 | if (ProfileIsCS) | |||
606 | return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); | |||
607 | ||||
608 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
609 | if (FS == nullptr) | |||
610 | return nullptr; | |||
611 | ||||
612 | return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL), | |||
613 | CalleeName, Reader->getRemapper()); | |||
614 | } | |||
615 | ||||
616 | /// Returns a vector of FunctionSamples that are the indirect call targets | |||
617 | /// of \p Inst. The vector is sorted by the total number of samples. Stores | |||
618 | /// the total call count of the indirect call in \p Sum. | |||
619 | std::vector<const FunctionSamples *> | |||
620 | SampleProfileLoader::findIndirectCallFunctionSamples( | |||
621 | const Instruction &Inst, uint64_t &Sum) const { | |||
622 | const DILocation *DIL = Inst.getDebugLoc(); | |||
623 | std::vector<const FunctionSamples *> R; | |||
624 | ||||
625 | if (!DIL) { | |||
626 | return R; | |||
627 | } | |||
628 | ||||
629 | auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { | |||
630 | assert(L && R && "Expect non-null FunctionSamples")((L && R && "Expect non-null FunctionSamples" ) ? static_cast<void> (0) : __assert_fail ("L && R && \"Expect non-null FunctionSamples\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 630, __PRETTY_FUNCTION__)); | |||
631 | if (L->getEntrySamples() != R->getEntrySamples()) | |||
632 | return L->getEntrySamples() > R->getEntrySamples(); | |||
633 | return FunctionSamples::getGUID(L->getName()) < | |||
634 | FunctionSamples::getGUID(R->getName()); | |||
635 | }; | |||
636 | ||||
637 | if (ProfileIsCS) { | |||
638 | auto CalleeSamples = | |||
639 | ContextTracker->getIndirectCalleeContextSamplesFor(DIL); | |||
640 | if (CalleeSamples.empty()) | |||
641 | return R; | |||
642 | ||||
643 | // For CSSPGO, we only use target context profile's entry count | |||
644 | // as that already includes both inlined callee and non-inlined ones.. | |||
645 | Sum = 0; | |||
646 | for (const auto *const FS : CalleeSamples) { | |||
647 | Sum += FS->getEntrySamples(); | |||
648 | R.push_back(FS); | |||
649 | } | |||
650 | llvm::sort(R, FSCompare); | |||
651 | return R; | |||
652 | } | |||
653 | ||||
654 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
655 | if (FS == nullptr) | |||
656 | return R; | |||
657 | ||||
658 | auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); | |||
659 | auto T = FS->findCallTargetMapAt(CallSite); | |||
660 | Sum = 0; | |||
661 | if (T) | |||
662 | for (const auto &T_C : T.get()) | |||
663 | Sum += T_C.second; | |||
664 | if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { | |||
665 | if (M->empty()) | |||
666 | return R; | |||
667 | for (const auto &NameFS : *M) { | |||
668 | Sum += NameFS.second.getEntrySamples(); | |||
669 | R.push_back(&NameFS.second); | |||
670 | } | |||
671 | llvm::sort(R, FSCompare); | |||
672 | } | |||
673 | return R; | |||
674 | } | |||
675 | ||||
676 | const FunctionSamples * | |||
677 | SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { | |||
678 | if (FunctionSamples::ProfileIsProbeBased) { | |||
679 | Optional<PseudoProbe> Probe = extractProbe(Inst); | |||
680 | if (!Probe) | |||
681 | return nullptr; | |||
682 | } | |||
683 | ||||
684 | const DILocation *DIL = Inst.getDebugLoc(); | |||
685 | if (!DIL) | |||
686 | return Samples; | |||
687 | ||||
688 | auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); | |||
689 | if (it.second) { | |||
690 | if (ProfileIsCS) | |||
691 | it.first->second = ContextTracker->getContextSamplesFor(DIL); | |||
692 | else | |||
693 | it.first->second = | |||
694 | Samples->findFunctionSamples(DIL, Reader->getRemapper()); | |||
695 | } | |||
696 | return it.first->second; | |||
697 | } | |||
698 | ||||
699 | /// If the profile count for the promotion candidate \p Candidate is 0, | |||
700 | /// it means \p Candidate has already been promoted for \p Inst. | |||
701 | static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) { | |||
702 | uint32_t NumVals = 0; | |||
703 | uint64_t TotalCount = 0; | |||
704 | std::unique_ptr<InstrProfValueData[]> ValueData = | |||
705 | std::make_unique<InstrProfValueData[]>(MaxNumPromotions); | |||
706 | bool Valid = | |||
707 | getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, | |||
708 | ValueData.get(), NumVals, TotalCount, true); | |||
709 | if (Valid) { | |||
710 | for (uint32_t I = 0; I < NumVals; I++) { | |||
711 | // If the promotion candidate has 0 count in the metadata, it | |||
712 | // means the candidate has been promoted for this indirect call. | |||
713 | if (ValueData[I].Value == Function::getGUID(Candidate)) | |||
714 | return ValueData[I].Count == 0; | |||
715 | } | |||
716 | } | |||
717 | return false; | |||
718 | } | |||
719 | ||||
720 | /// Update indirect call target profile metadata for \p Inst. If \p Total | |||
721 | /// is given, set TotalCount of call targets counts to \p Total, otherwise | |||
722 | /// keep the original value in metadata. | |||
723 | static void | |||
724 | updateIDTMetaData(Instruction &Inst, | |||
725 | const SmallVectorImpl<InstrProfValueData> &CallTargets, | |||
726 | uint64_t Total = 0) { | |||
727 | DenseMap<uint64_t, uint64_t> ValueCountMap; | |||
728 | ||||
729 | uint32_t NumVals = 0; | |||
730 | uint64_t TotalCount = 0; | |||
731 | std::unique_ptr<InstrProfValueData[]> ValueData = | |||
732 | std::make_unique<InstrProfValueData[]>(MaxNumPromotions); | |||
733 | bool Valid = | |||
734 | getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, | |||
735 | ValueData.get(), NumVals, TotalCount, true); | |||
736 | if (Valid) { | |||
737 | for (uint32_t I = 0; I < NumVals; I++) | |||
738 | ValueCountMap[ValueData[I].Value] = ValueData[I].Count; | |||
739 | } | |||
740 | ||||
741 | for (const auto &Data : CallTargets) { | |||
742 | auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); | |||
743 | if (Pair.second) | |||
744 | continue; | |||
745 | // Update existing profile count of the call target if it is not 0. | |||
746 | // If it is 0, the call target has been promoted so keep it as 0. | |||
747 | if (Pair.first->second != 0) | |||
748 | Pair.first->second = Data.Count; | |||
749 | else { | |||
750 | assert(Total >= Data.Count && "Total should be >= Data.Count")((Total >= Data.Count && "Total should be >= Data.Count" ) ? static_cast<void> (0) : __assert_fail ("Total >= Data.Count && \"Total should be >= Data.Count\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 750, __PRETTY_FUNCTION__)); | |||
751 | Total -= Data.Count; | |||
752 | } | |||
753 | } | |||
754 | ||||
755 | SmallVector<InstrProfValueData, 8> NewCallTargets; | |||
756 | for (const auto &ValueCount : ValueCountMap) { | |||
757 | NewCallTargets.emplace_back( | |||
758 | InstrProfValueData{ValueCount.first, ValueCount.second}); | |||
759 | } | |||
760 | llvm::sort(NewCallTargets, | |||
761 | [](const InstrProfValueData &L, const InstrProfValueData &R) { | |||
762 | if (L.Count != R.Count) | |||
763 | return L.Count > R.Count; | |||
764 | return L.Value > R.Value; | |||
765 | }); | |||
766 | annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst, | |||
767 | NewCallTargets, Total ? Total : TotalCount, | |||
768 | IPVK_IndirectCallTarget, NewCallTargets.size()); | |||
769 | } | |||
770 | ||||
771 | /// Attempt to promote indirect call and also inline the promoted call. | |||
772 | /// | |||
773 | /// \param F Caller function. | |||
774 | /// \param Candidate ICP and inline candidate. | |||
775 | /// \param Sum Sum of target counts for indirect call. | |||
776 | /// \param PromotedInsns Map to keep track of indirect call already processed. | |||
777 | /// \param InlinedCallSite Output vector for new call sites exposed after | |||
778 | /// inlining. | |||
779 | bool SampleProfileLoader::tryPromoteAndInlineCandidate( | |||
780 | Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, | |||
781 | DenseSet<Instruction *> &PromotedInsns, | |||
782 | SmallVector<CallBase *, 8> *InlinedCallSite) { | |||
783 | auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName(); | |||
784 | auto R = SymbolMap.find(CalleeFunctionName); | |||
785 | if (R == SymbolMap.end() || !R->getValue()) | |||
786 | return false; | |||
787 | ||||
788 | auto &CI = *Candidate.CallInstr; | |||
789 | if (isPromotedBefore(CI, R->getValue()->getName())) | |||
790 | return false; | |||
791 | ||||
792 | const char *Reason = "Callee function not available"; | |||
793 | // R->getValue() != &F is to prevent promoting a recursive call. | |||
794 | // If it is a recursive call, we do not inline it as it could bloat | |||
795 | // the code exponentially. There is way to better handle this, e.g. | |||
796 | // clone the caller first, and inline the cloned caller if it is | |||
797 | // recursive. As llvm does not inline recursive calls, we will | |||
798 | // simply ignore it instead of handling it explicitly. | |||
799 | if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && | |||
800 | R->getValue()->hasFnAttribute("use-sample-profile") && | |||
801 | R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) { | |||
802 | // For promoted target, save 0 count in the value profile metadata so | |||
803 | // the target won't be promoted again. | |||
804 | SmallVector<InstrProfValueData, 1> SortedCallTargets = { | |||
805 | InstrProfValueData{Function::getGUID(R->getValue()->getName()), 0}}; | |||
806 | updateIDTMetaData(CI, SortedCallTargets); | |||
807 | ||||
808 | auto *DI = &pgo::promoteIndirectCall( | |||
809 | CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE); | |||
810 | if (DI) { | |||
811 | Sum -= Candidate.CallsiteCount; | |||
812 | // Prorate the indirect callsite distribution. | |||
813 | // Do not update the promoted direct callsite distribution at this | |||
814 | // point since the original distribution combined with the callee | |||
815 | // profile will be used to prorate callsites from the callee if | |||
816 | // inlined. Once not inlined, the direct callsite distribution should | |||
817 | // be prorated so that the it will reflect the real callsite counts. | |||
818 | setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum / | |||
819 | SumOrigin); | |||
820 | PromotedInsns.insert(Candidate.CallInstr); | |||
821 | Candidate.CallInstr = DI; | |||
822 | if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) { | |||
823 | bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); | |||
824 | if (!Inlined) { | |||
825 | // Prorate the direct callsite distribution so that it reflects real | |||
826 | // callsite counts. | |||
827 | setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution * | |||
828 | Candidate.CallsiteCount / | |||
829 | SumOrigin); | |||
830 | } | |||
831 | return Inlined; | |||
832 | } | |||
833 | } | |||
834 | } else { | |||
835 | LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false) | |||
836 | << Candidate.CalleeSamples->getFuncName() << " because "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false) | |||
837 | << Reason << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false); | |||
838 | } | |||
839 | return false; | |||
840 | } | |||
841 | ||||
842 | bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { | |||
843 | if (!ProfileSizeInline) | |||
844 | return false; | |||
845 | ||||
846 | Function *Callee = CallInst.getCalledFunction(); | |||
847 | if (Callee == nullptr) | |||
848 | return false; | |||
849 | ||||
850 | InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee), | |||
851 | GetAC, GetTLI); | |||
852 | ||||
853 | if (Cost.isNever()) | |||
854 | return false; | |||
855 | ||||
856 | if (Cost.isAlways()) | |||
857 | return true; | |||
858 | ||||
859 | return Cost.getCost() <= SampleColdCallSiteThreshold; | |||
860 | } | |||
861 | ||||
862 | void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( | |||
863 | const SmallVectorImpl<CallBase *> &Candidates, const Function &F, | |||
864 | bool Hot) { | |||
865 | for (auto I : Candidates) { | |||
866 | Function *CalledFunction = I->getCalledFunction(); | |||
867 | if (CalledFunction) { | |||
868 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineAttempt", | |||
869 | I->getDebugLoc(), I->getParent()) | |||
870 | << "previous inlining reattempted for " | |||
871 | << (Hot ? "hotness: '" : "size: '") | |||
872 | << ore::NV("Callee", CalledFunction) << "' into '" | |||
873 | << ore::NV("Caller", &F) << "'"); | |||
874 | } | |||
875 | } | |||
876 | } | |||
877 | ||||
878 | /// Iteratively inline hot callsites of a function. | |||
879 | /// | |||
880 | /// Iteratively traverse all callsites of the function \p F, and find if | |||
881 | /// the corresponding inlined instance exists and is hot in profile. If | |||
882 | /// it is hot enough, inline the callsites and adds new callsites of the | |||
883 | /// callee into the caller. If the call is an indirect call, first promote | |||
884 | /// it to direct call. Each indirect call is limited with a single target. | |||
885 | /// | |||
886 | /// \param F function to perform iterative inlining. | |||
887 | /// \param InlinedGUIDs a set to be updated to include all GUIDs that are | |||
888 | /// inlined in the profiled binary. | |||
889 | /// | |||
890 | /// \returns True if there is any inline happened. | |||
891 | bool SampleProfileLoader::inlineHotFunctions( | |||
892 | Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { | |||
893 | DenseSet<Instruction *> PromotedInsns; | |||
894 | ||||
895 | // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure | |||
896 | // Profile symbol list is ignored when profile-sample-accurate is on. | |||
897 | assert((!ProfAccForSymsInList ||(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 901, __PRETTY_FUNCTION__)) | |||
898 | (!ProfileSampleAccurate &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 901, __PRETTY_FUNCTION__)) | |||
899 | !F.hasFnAttribute("profile-sample-accurate"))) &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 901, __PRETTY_FUNCTION__)) | |||
900 | "ProfAccForSymsInList should be false when profile-sample-accurate "(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 901, __PRETTY_FUNCTION__)) | |||
901 | "is enabled")(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 901, __PRETTY_FUNCTION__)); | |||
902 | ||||
903 | DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; | |||
904 | bool Changed = false; | |||
905 | bool LocalChanged = true; | |||
906 | while (LocalChanged) { | |||
907 | LocalChanged = false; | |||
908 | SmallVector<CallBase *, 10> CIS; | |||
909 | for (auto &BB : F) { | |||
910 | bool Hot = false; | |||
911 | SmallVector<CallBase *, 10> AllCandidates; | |||
912 | SmallVector<CallBase *, 10> ColdCandidates; | |||
913 | for (auto &I : BB.getInstList()) { | |||
914 | const FunctionSamples *FS = nullptr; | |||
915 | if (auto *CB = dyn_cast<CallBase>(&I)) { | |||
916 | if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) { | |||
917 | assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&(((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated") ? static_cast<void > (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 918, __PRETTY_FUNCTION__)) | |||
918 | "GUIDToFuncNameMap has to be populated")(((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated") ? static_cast<void > (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 918, __PRETTY_FUNCTION__)); | |||
919 | AllCandidates.push_back(CB); | |||
920 | if (FS->getEntrySamples() > 0 || ProfileIsCS) | |||
921 | LocalNotInlinedCallSites.try_emplace(CB, FS); | |||
922 | if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) | |||
923 | Hot = true; | |||
924 | else if (shouldInlineColdCallee(*CB)) | |||
925 | ColdCandidates.push_back(CB); | |||
926 | } | |||
927 | } | |||
928 | } | |||
929 | if (Hot || ExternalInlineAdvisor) { | |||
930 | CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); | |||
931 | emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true); | |||
932 | } else { | |||
933 | CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end()); | |||
934 | emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false); | |||
935 | } | |||
936 | } | |||
937 | for (CallBase *I : CIS) { | |||
938 | Function *CalledFunction = I->getCalledFunction(); | |||
939 | InlineCandidate Candidate = { | |||
940 | I, | |||
941 | LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] | |||
942 | : nullptr, | |||
943 | 0 /* dummy count */, 1.0 /* dummy distribution factor */}; | |||
944 | // Do not inline recursive calls. | |||
945 | if (CalledFunction == &F) | |||
946 | continue; | |||
947 | if (I->isIndirectCall()) { | |||
948 | if (PromotedInsns.count(I)) | |||
949 | continue; | |||
950 | uint64_t Sum; | |||
951 | for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { | |||
952 | uint64_t SumOrigin = Sum; | |||
953 | if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
954 | FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), | |||
955 | PSI->getOrCompHotCountThreshold()); | |||
956 | continue; | |||
957 | } | |||
958 | if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) | |||
959 | continue; | |||
960 | ||||
961 | Candidate = {I, FS, FS->getEntrySamples(), 1.0}; | |||
962 | if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, | |||
963 | PromotedInsns)) { | |||
964 | LocalNotInlinedCallSites.erase(I); | |||
965 | LocalChanged = true; | |||
966 | } | |||
967 | } | |||
968 | } else if (CalledFunction && CalledFunction->getSubprogram() && | |||
969 | !CalledFunction->isDeclaration()) { | |||
970 | if (tryInlineCandidate(Candidate)) { | |||
971 | LocalNotInlinedCallSites.erase(I); | |||
972 | LocalChanged = true; | |||
973 | } | |||
974 | } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
975 | findCalleeFunctionSamples(*I)->findInlinedFunctions( | |||
976 | InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); | |||
977 | } | |||
978 | } | |||
979 | Changed |= LocalChanged; | |||
980 | } | |||
981 | ||||
982 | // For CS profile, profile for not inlined context will be merged when | |||
983 | // base profile is being trieved | |||
984 | if (ProfileIsCS) | |||
985 | return Changed; | |||
986 | ||||
987 | // Accumulate not inlined callsite information into notInlinedSamples | |||
988 | for (const auto &Pair : LocalNotInlinedCallSites) { | |||
989 | CallBase *I = Pair.getFirst(); | |||
990 | Function *Callee = I->getCalledFunction(); | |||
991 | if (!Callee || Callee->isDeclaration()) | |||
992 | continue; | |||
993 | ||||
994 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "NotInline", | |||
995 | I->getDebugLoc(), I->getParent()) | |||
996 | << "previous inlining not repeated: '" | |||
997 | << ore::NV("Callee", Callee) << "' into '" | |||
998 | << ore::NV("Caller", &F) << "'"); | |||
999 | ||||
1000 | ++NumCSNotInlined; | |||
1001 | const FunctionSamples *FS = Pair.getSecond(); | |||
1002 | if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { | |||
1003 | continue; | |||
1004 | } | |||
1005 | ||||
1006 | if (ProfileMergeInlinee) { | |||
1007 | // A function call can be replicated by optimizations like callsite | |||
1008 | // splitting or jump threading and the replicates end up sharing the | |||
1009 | // sample nested callee profile instead of slicing the original inlinee's | |||
1010 | // profile. We want to do merge exactly once by filtering out callee | |||
1011 | // profiles with a non-zero head sample count. | |||
1012 | if (FS->getHeadSamples() == 0) { | |||
1013 | // Use entry samples as head samples during the merge, as inlinees | |||
1014 | // don't have head samples. | |||
1015 | const_cast<FunctionSamples *>(FS)->addHeadSamples( | |||
1016 | FS->getEntrySamples()); | |||
1017 | ||||
1018 | // Note that we have to do the merge right after processing function. | |||
1019 | // This allows OutlineFS's profile to be used for annotation during | |||
1020 | // top-down processing of functions' annotation. | |||
1021 | FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); | |||
1022 | OutlineFS->merge(*FS); | |||
1023 | } | |||
1024 | } else { | |||
1025 | auto pair = | |||
1026 | notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); | |||
1027 | pair.first->second.entryCount += FS->getEntrySamples(); | |||
1028 | } | |||
1029 | } | |||
1030 | return Changed; | |||
1031 | } | |||
1032 | ||||
1033 | bool SampleProfileLoader::tryInlineCandidate( | |||
1034 | InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) { | |||
1035 | ||||
1036 | CallBase &CB = *Candidate.CallInstr; | |||
1037 | Function *CalledFunction = CB.getCalledFunction(); | |||
1038 | assert(CalledFunction && "Expect a callee with definition")((CalledFunction && "Expect a callee with definition" ) ? static_cast<void> (0) : __assert_fail ("CalledFunction && \"Expect a callee with definition\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1038, __PRETTY_FUNCTION__)); | |||
1039 | DebugLoc DLoc = CB.getDebugLoc(); | |||
1040 | BasicBlock *BB = CB.getParent(); | |||
1041 | ||||
1042 | InlineCost Cost = shouldInlineCandidate(Candidate); | |||
1043 | if (Cost.isNever()) { | |||
1044 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineFail", DLoc, BB) | |||
1045 | << "incompatible inlining"); | |||
1046 | return false; | |||
1047 | } | |||
1048 | ||||
1049 | if (!Cost) | |||
1050 | return false; | |||
1051 | ||||
1052 | InlineFunctionInfo IFI(nullptr, GetAC); | |||
1053 | if (InlineFunction(CB, IFI).isSuccess()) { | |||
1054 | // The call to InlineFunction erases I, so we can't pass it here. | |||
1055 | emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, | |||
1056 | true, CSINLINE_DEBUG"sample-profile" "-inline"); | |||
1057 | ||||
1058 | // Now populate the list of newly exposed call sites. | |||
1059 | if (InlinedCallSites) { | |||
1060 | InlinedCallSites->clear(); | |||
1061 | for (auto &I : IFI.InlinedCallSites) | |||
1062 | InlinedCallSites->push_back(I); | |||
1063 | } | |||
1064 | ||||
1065 | if (ProfileIsCS) | |||
1066 | ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); | |||
1067 | ++NumCSInlined; | |||
1068 | ||||
1069 | // Prorate inlined probes for a duplicated inlining callsite which probably | |||
1070 | // has a distribution less than 100%. Samples for an inlinee should be | |||
1071 | // distributed among the copies of the original callsite based on each | |||
1072 | // callsite's distribution factor for counts accuracy. Note that an inlined | |||
1073 | // probe may come with its own distribution factor if it has been duplicated | |||
1074 | // in the inlinee body. The two factor are multiplied to reflect the | |||
1075 | // aggregation of duplication. | |||
1076 | if (Candidate.CallsiteDistribution < 1) { | |||
1077 | for (auto &I : IFI.InlinedCallSites) { | |||
1078 | if (Optional<PseudoProbe> Probe = extractProbe(*I)) | |||
1079 | setProbeDistributionFactor(*I, Probe->Factor * | |||
1080 | Candidate.CallsiteDistribution); | |||
1081 | } | |||
1082 | NumDuplicatedInlinesite++; | |||
1083 | } | |||
1084 | ||||
1085 | return true; | |||
1086 | } | |||
1087 | return false; | |||
1088 | } | |||
1089 | ||||
1090 | bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, | |||
1091 | CallBase *CB) { | |||
1092 | assert(CB && "Expect non-null call instruction")((CB && "Expect non-null call instruction") ? static_cast <void> (0) : __assert_fail ("CB && \"Expect non-null call instruction\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1092, __PRETTY_FUNCTION__)); | |||
1093 | ||||
1094 | if (isa<IntrinsicInst>(CB)) | |||
1095 | return false; | |||
1096 | ||||
1097 | // Find the callee's profile. For indirect call, find hottest target profile. | |||
1098 | const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB); | |||
1099 | if (!CalleeSamples) | |||
1100 | return false; | |||
1101 | ||||
1102 | float Factor = 1.0; | |||
1103 | if (Optional<PseudoProbe> Probe = extractProbe(*CB)) | |||
1104 | Factor = Probe->Factor; | |||
1105 | ||||
1106 | uint64_t CallsiteCount = 0; | |||
1107 | ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent()); | |||
1108 | if (Weight) | |||
1109 | CallsiteCount = Weight.get(); | |||
1110 | if (CalleeSamples) | |||
1111 | CallsiteCount = std::max( | |||
1112 | CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); | |||
1113 | ||||
1114 | *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; | |||
1115 | return true; | |||
1116 | } | |||
1117 | ||||
1118 | InlineCost | |||
1119 | SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { | |||
1120 | std::unique_ptr<InlineAdvice> Advice = nullptr; | |||
1121 | if (ExternalInlineAdvisor) { | |||
1122 | Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); | |||
1123 | if (!Advice->isInliningRecommended()) { | |||
1124 | Advice->recordUnattemptedInlining(); | |||
1125 | return InlineCost::getNever("not previously inlined"); | |||
1126 | } | |||
1127 | Advice->recordInlining(); | |||
1128 | return InlineCost::getAlways("previously inlined"); | |||
1129 | } | |||
1130 | ||||
1131 | // Adjust threshold based on call site hotness, only do this for callsite | |||
1132 | // prioritized inliner because otherwise cost-benefit check is done earlier. | |||
1133 | int SampleThreshold = SampleColdCallSiteThreshold; | |||
1134 | if (CallsitePrioritizedInline) { | |||
1135 | if (Candidate.CallsiteCount > PSI->getHotCountThreshold()) | |||
1136 | SampleThreshold = SampleHotCallSiteThreshold; | |||
1137 | else if (!ProfileSizeInline) | |||
1138 | return InlineCost::getNever("cold callsite"); | |||
1139 | } | |||
1140 | ||||
1141 | Function *Callee = Candidate.CallInstr->getCalledFunction(); | |||
1142 | assert(Callee && "Expect a definition for inline candidate of direct call")((Callee && "Expect a definition for inline candidate of direct call" ) ? static_cast<void> (0) : __assert_fail ("Callee && \"Expect a definition for inline candidate of direct call\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1142, __PRETTY_FUNCTION__)); | |||
1143 | ||||
1144 | InlineParams Params = getInlineParams(); | |||
1145 | Params.ComputeFullInlineCost = true; | |||
1146 | // Checks if there is anything in the reachable portion of the callee at | |||
1147 | // this callsite that makes this inlining potentially illegal. Need to | |||
1148 | // set ComputeFullInlineCost, otherwise getInlineCost may return early | |||
1149 | // when cost exceeds threshold without checking all IRs in the callee. | |||
1150 | // The acutal cost does not matter because we only checks isNever() to | |||
1151 | // see if it is legal to inline the callsite. | |||
1152 | InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, | |||
1153 | GetTTI(*Callee), GetAC, GetTLI); | |||
1154 | ||||
1155 | // Honor always inline and never inline from call analyzer | |||
1156 | if (Cost.isNever() || Cost.isAlways()) | |||
1157 | return Cost; | |||
1158 | ||||
1159 | // For old FDO inliner, we inline the call site as long as cost is not | |||
1160 | // "Never". The cost-benefit check is done earlier. | |||
1161 | if (!CallsitePrioritizedInline) { | |||
1162 | return InlineCost::get(Cost.getCost(), INT_MAX2147483647); | |||
1163 | } | |||
1164 | ||||
1165 | // Otherwise only use the cost from call analyzer, but overwite threshold with | |||
1166 | // Sample PGO threshold. | |||
1167 | return InlineCost::get(Cost.getCost(), SampleThreshold); | |||
1168 | } | |||
1169 | ||||
1170 | bool SampleProfileLoader::inlineHotFunctionsWithPriority( | |||
1171 | Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { | |||
1172 | DenseSet<Instruction *> PromotedInsns; | |||
1173 | assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now")((ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now" ) ? static_cast<void> (0) : __assert_fail ("ProfileIsCS && \"Prioritiy based inliner only works with CSSPGO now\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1173, __PRETTY_FUNCTION__)); | |||
| ||||
1174 | ||||
1175 | // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure | |||
1176 | // Profile symbol list is ignored when profile-sample-accurate is on. | |||
1177 | assert((!ProfAccForSymsInList ||(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1181, __PRETTY_FUNCTION__)) | |||
1178 | (!ProfileSampleAccurate &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1181, __PRETTY_FUNCTION__)) | |||
1179 | !F.hasFnAttribute("profile-sample-accurate"))) &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1181, __PRETTY_FUNCTION__)) | |||
1180 | "ProfAccForSymsInList should be false when profile-sample-accurate "(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1181, __PRETTY_FUNCTION__)) | |||
1181 | "is enabled")(((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? static_cast<void> (0) : __assert_fail ( "(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1181, __PRETTY_FUNCTION__)); | |||
1182 | ||||
1183 | // Populating worklist with initial call sites from root inliner, along | |||
1184 | // with call site weights. | |||
1185 | CandidateQueue CQueue; | |||
1186 | InlineCandidate NewCandidate; | |||
1187 | for (auto &BB : F) { | |||
1188 | for (auto &I : BB.getInstList()) { | |||
1189 | auto *CB = dyn_cast<CallBase>(&I); | |||
1190 | if (!CB) | |||
1191 | continue; | |||
1192 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1193 | CQueue.push(NewCandidate); | |||
1194 | } | |||
1195 | } | |||
1196 | ||||
1197 | // Cap the size growth from profile guided inlining. This is needed even | |||
1198 | // though cost of each inline candidate already accounts for callee size, | |||
1199 | // because with top-down inlining, we can grow inliner size significantly | |||
1200 | // with large number of smaller inlinees each pass the cost check. | |||
1201 | assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&((ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1203, __PRETTY_FUNCTION__)) | |||
1202 | "Max inline size limit should not be smaller than min inline size "((ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1203, __PRETTY_FUNCTION__)) | |||
1203 | "limit.")((ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1203, __PRETTY_FUNCTION__)); | |||
1204 | unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit; | |||
1205 | SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); | |||
1206 | SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); | |||
1207 | if (ExternalInlineAdvisor) | |||
1208 | SizeLimit = std::numeric_limits<unsigned>::max(); | |||
1209 | ||||
1210 | // Perform iterative BFS call site prioritized inlining | |||
1211 | bool Changed = false; | |||
1212 | while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { | |||
1213 | InlineCandidate Candidate = CQueue.top(); | |||
1214 | CQueue.pop(); | |||
1215 | CallBase *I = Candidate.CallInstr; | |||
1216 | Function *CalledFunction = I->getCalledFunction(); | |||
1217 | ||||
1218 | if (CalledFunction == &F) | |||
1219 | continue; | |||
1220 | if (I->isIndirectCall()) { | |||
1221 | if (PromotedInsns.count(I)) | |||
1222 | continue; | |||
1223 | uint64_t Sum; | |||
1224 | auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); | |||
1225 | uint64_t SumOrigin = Sum; | |||
| ||||
1226 | Sum *= Candidate.CallsiteDistribution; | |||
1227 | for (const auto *FS : CalleeSamples) { | |||
1228 | // TODO: Consider disable pre-lTO ICP for MonoLTO as well | |||
1229 | if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1230 | FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), | |||
1231 | PSI->getOrCompHotCountThreshold()); | |||
1232 | continue; | |||
1233 | } | |||
1234 | uint64_t EntryCountDistributed = | |||
1235 | FS->getEntrySamples() * Candidate.CallsiteDistribution; | |||
1236 | // In addition to regular inline cost check, we also need to make sure | |||
1237 | // ICP isn't introducing excessive speculative checks even if individual | |||
1238 | // target looks beneficial to promote and inline. That means we should | |||
1239 | // only do ICP when there's a small number dominant targets. | |||
1240 | if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) | |||
1241 | break; | |||
1242 | // TODO: Fix CallAnalyzer to handle all indirect calls. | |||
1243 | // For indirect call, we don't run CallAnalyzer to get InlineCost | |||
1244 | // before actual inlining. This is because we could see two different | |||
1245 | // types from the same definition, which makes CallAnalyzer choke as | |||
1246 | // it's expecting matching parameter type on both caller and callee | |||
1247 | // side. See example from PR18962 for the triggering cases (the bug was | |||
1248 | // fixed, but we generate different types). | |||
1249 | if (!PSI->isHotCount(EntryCountDistributed)) | |||
1250 | break; | |||
1251 | SmallVector<CallBase *, 8> InlinedCallSites; | |||
1252 | // Attach function profile for promoted indirect callee, and update | |||
1253 | // call site count for the promoted inline candidate too. | |||
1254 | Candidate = {I, FS, EntryCountDistributed, | |||
1255 | Candidate.CallsiteDistribution}; | |||
1256 | if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, | |||
1257 | PromotedInsns, &InlinedCallSites)) { | |||
1258 | for (auto *CB : InlinedCallSites) { | |||
1259 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1260 | CQueue.emplace(NewCandidate); | |||
1261 | } | |||
1262 | Changed = true; | |||
1263 | } | |||
1264 | } | |||
1265 | } else if (CalledFunction && CalledFunction->getSubprogram() && | |||
1266 | !CalledFunction->isDeclaration()) { | |||
1267 | SmallVector<CallBase *, 8> InlinedCallSites; | |||
1268 | if (tryInlineCandidate(Candidate, &InlinedCallSites)) { | |||
1269 | for (auto *CB : InlinedCallSites) { | |||
1270 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1271 | CQueue.emplace(NewCandidate); | |||
1272 | } | |||
1273 | Changed = true; | |||
1274 | } | |||
1275 | } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1276 | findCalleeFunctionSamples(*I)->findInlinedFunctions( | |||
1277 | InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); | |||
1278 | } | |||
1279 | } | |||
1280 | ||||
1281 | if (!CQueue.empty()) { | |||
1282 | if (SizeLimit == (unsigned)ProfileInlineLimitMax) | |||
1283 | ++NumCSInlinedHitMaxLimit; | |||
1284 | else if (SizeLimit == (unsigned)ProfileInlineLimitMin) | |||
1285 | ++NumCSInlinedHitMinLimit; | |||
1286 | else | |||
1287 | ++NumCSInlinedHitGrowthLimit; | |||
1288 | } | |||
1289 | ||||
1290 | return Changed; | |||
1291 | } | |||
1292 | ||||
1293 | /// Returns the sorted CallTargetMap \p M by count in descending order. | |||
1294 | static SmallVector<InstrProfValueData, 2> | |||
1295 | GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) { | |||
1296 | SmallVector<InstrProfValueData, 2> R; | |||
1297 | for (const auto &I : SampleRecord::SortCallTargets(M)) { | |||
1298 | R.emplace_back( | |||
1299 | InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); | |||
1300 | } | |||
1301 | return R; | |||
1302 | } | |||
1303 | ||||
1304 | // Generate MD_prof metadata for every branch instruction using the | |||
1305 | // edge weights computed during propagation. | |||
1306 | void SampleProfileLoader::generateMDProfMetadata(Function &F) { | |||
1307 | // Generate MD_prof metadata for every branch instruction using the | |||
1308 | // edge weights computed during propagation. | |||
1309 | LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nPropagation complete. Setting branch weights\n" ; } } while (false); | |||
1310 | LLVMContext &Ctx = F.getContext(); | |||
1311 | MDBuilder MDB(Ctx); | |||
1312 | for (auto &BI : F) { | |||
1313 | BasicBlock *BB = &BI; | |||
1314 | ||||
1315 | if (BlockWeights[BB]) { | |||
1316 | for (auto &I : BB->getInstList()) { | |||
1317 | if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) | |||
1318 | continue; | |||
1319 | if (!cast<CallBase>(I).getCalledFunction()) { | |||
1320 | const DebugLoc &DLoc = I.getDebugLoc(); | |||
1321 | if (!DLoc) | |||
1322 | continue; | |||
1323 | const DILocation *DIL = DLoc; | |||
1324 | const FunctionSamples *FS = findFunctionSamples(I); | |||
1325 | if (!FS) | |||
1326 | continue; | |||
1327 | auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); | |||
1328 | auto T = FS->findCallTargetMapAt(CallSite); | |||
1329 | if (!T || T.get().empty()) | |||
1330 | continue; | |||
1331 | // Prorate the callsite counts to reflect what is already done to the | |||
1332 | // callsite, such as ICP or calliste cloning. | |||
1333 | if (FunctionSamples::ProfileIsProbeBased) { | |||
1334 | if (Optional<PseudoProbe> Probe = extractProbe(I)) { | |||
1335 | if (Probe->Factor < 1) | |||
1336 | T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); | |||
1337 | } | |||
1338 | } | |||
1339 | SmallVector<InstrProfValueData, 2> SortedCallTargets = | |||
1340 | GetSortedValueDataFromCallTargets(T.get()); | |||
1341 | uint64_t Sum = 0; | |||
1342 | for (const auto &C : T.get()) | |||
1343 | Sum += C.second; | |||
1344 | // With CSSPGO all indirect call targets are counted torwards the | |||
1345 | // original indirect call site in the profile, including both | |||
1346 | // inlined and non-inlined targets. | |||
1347 | if (!FunctionSamples::ProfileIsCS) { | |||
1348 | if (const FunctionSamplesMap *M = | |||
1349 | FS->findFunctionSamplesMapAt(CallSite)) { | |||
1350 | for (const auto &NameFS : *M) | |||
1351 | Sum += NameFS.second.getEntrySamples(); | |||
1352 | } | |||
1353 | } | |||
1354 | updateIDTMetaData(I, SortedCallTargets, Sum); | |||
1355 | } else if (!isa<IntrinsicInst>(&I)) { | |||
1356 | I.setMetadata(LLVMContext::MD_prof, | |||
1357 | MDB.createBranchWeights( | |||
1358 | {static_cast<uint32_t>(BlockWeights[BB])})); | |||
1359 | } | |||
1360 | } | |||
1361 | } | |||
1362 | Instruction *TI = BB->getTerminator(); | |||
1363 | if (TI->getNumSuccessors() == 1) | |||
1364 | continue; | |||
1365 | if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) | |||
1366 | continue; | |||
1367 | ||||
1368 | DebugLoc BranchLoc = TI->getDebugLoc(); | |||
1369 | LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1370 | << ((BranchLoc) ? Twine(BranchLoc.getLine())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1371 | : Twine("<UNKNOWN LOCATION>"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1372 | << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false); | |||
1373 | SmallVector<uint32_t, 4> Weights; | |||
1374 | uint32_t MaxWeight = 0; | |||
1375 | Instruction *MaxDestInst; | |||
1376 | for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { | |||
1377 | BasicBlock *Succ = TI->getSuccessor(I); | |||
1378 | Edge E = std::make_pair(BB, Succ); | |||
1379 | uint64_t Weight = EdgeWeights[E]; | |||
1380 | LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\t"; printEdgeWeight(dbgs (), E); } } while (false); | |||
1381 | // Use uint32_t saturated arithmetic to adjust the incoming weights, | |||
1382 | // if needed. Sample counts in profiles are 64-bit unsigned values, | |||
1383 | // but internally branch weights are expressed as 32-bit values. | |||
1384 | if (Weight > std::numeric_limits<uint32_t>::max()) { | |||
1385 | LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " (saturated due to uint32_t overflow)" ; } } while (false); | |||
1386 | Weight = std::numeric_limits<uint32_t>::max(); | |||
1387 | } | |||
1388 | // Weight is added by one to avoid propagation errors introduced by | |||
1389 | // 0 weights. | |||
1390 | Weights.push_back(static_cast<uint32_t>(Weight + 1)); | |||
1391 | if (Weight != 0) { | |||
1392 | if (Weight > MaxWeight) { | |||
1393 | MaxWeight = Weight; | |||
1394 | MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime(); | |||
1395 | } | |||
1396 | } | |||
1397 | } | |||
1398 | ||||
1399 | uint64_t TempWeight; | |||
1400 | // Only set weights if there is at least one non-zero weight. | |||
1401 | // In any other case, let the analyzer set weights. | |||
1402 | // Do not set weights if the weights are present. In ThinLTO, the profile | |||
1403 | // annotation is done twice. If the first annotation already set the | |||
1404 | // weights, the second pass does not need to set it. | |||
1405 | if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) { | |||
1406 | LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "SUCCESS. Found non-zero weights.\n" ; } } while (false); | |||
1407 | TI->setMetadata(LLVMContext::MD_prof, | |||
1408 | MDB.createBranchWeights(Weights)); | |||
1409 | ORE->emit([&]() { | |||
1410 | return OptimizationRemark(DEBUG_TYPE"sample-profile", "PopularDest", MaxDestInst) | |||
1411 | << "most popular destination for conditional branches at " | |||
1412 | << ore::NV("CondBranchesLoc", BranchLoc); | |||
1413 | }); | |||
1414 | } else { | |||
1415 | LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "SKIPPED. All branch weights are zero.\n" ; } } while (false); | |||
1416 | } | |||
1417 | } | |||
1418 | } | |||
1419 | ||||
1420 | /// Once all the branch weights are computed, we emit the MD_prof | |||
1421 | /// metadata on BB using the computed values for each of its branches. | |||
1422 | /// | |||
1423 | /// \param F The function to query. | |||
1424 | /// | |||
1425 | /// \returns true if \p F was modified. Returns false, otherwise. | |||
1426 | bool SampleProfileLoader::emitAnnotations(Function &F) { | |||
1427 | bool Changed = false; | |||
1428 | ||||
1429 | if (FunctionSamples::ProfileIsProbeBased) { | |||
1430 | if (!ProbeManager->profileIsValid(F, *Samples)) { | |||
1431 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false) | |||
1432 | dbgs() << "Profile is invalid due to CFG mismatch for Function "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false) | |||
1433 | << F.getName())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false); | |||
1434 | ++NumMismatchedProfile; | |||
1435 | return false; | |||
1436 | } | |||
1437 | ++NumMatchedProfile; | |||
1438 | } else { | |||
1439 | if (getFunctionLoc(F) == 0) | |||
1440 | return false; | |||
1441 | ||||
1442 | LLVM_DEBUG(dbgs() << "Line number for the first instruction in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F ) << "\n"; } } while (false) | |||
1443 | << F.getName() << ": " << getFunctionLoc(F) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F ) << "\n"; } } while (false); | |||
1444 | } | |||
1445 | ||||
1446 | DenseSet<GlobalValue::GUID> InlinedGUIDs; | |||
1447 | if (ProfileIsCS && CallsitePrioritizedInline) | |||
1448 | Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); | |||
1449 | else | |||
1450 | Changed |= inlineHotFunctions(F, InlinedGUIDs); | |||
1451 | ||||
1452 | Changed |= computeAndPropagateWeights(F, InlinedGUIDs); | |||
1453 | ||||
1454 | if (Changed) | |||
1455 | generateMDProfMetadata(F); | |||
1456 | ||||
1457 | emitCoverageRemarks(F); | |||
1458 | return Changed; | |||
1459 | } | |||
1460 | ||||
1461 | char SampleProfileLoaderLegacyPass::ID = 0; | |||
1462 | ||||
1463 | INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",static void *initializeSampleProfileLoaderLegacyPassPassOnce( PassRegistry &Registry) { | |||
1464 | "Sample Profile loader", false, false)static void *initializeSampleProfileLoaderLegacyPassPassOnce( PassRegistry &Registry) { | |||
1465 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
1466 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
1467 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
1468 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry); | |||
1469 | INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile" , &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SampleProfileLoaderLegacyPass>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag ; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag , initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry )); } | |||
1470 | "Sample Profile loader", false, false)PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile" , &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SampleProfileLoaderLegacyPass>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag ; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag , initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry )); } | |||
1471 | ||||
1472 | // Add inlined profile call edges to the call graph. | |||
1473 | void SampleProfileLoader::addCallGraphEdges(CallGraph &CG, | |||
1474 | const FunctionSamples &Samples) { | |||
1475 | Function *Caller = SymbolMap.lookup(Samples.getFuncName()); | |||
1476 | if (!Caller || Caller->isDeclaration()) | |||
1477 | return; | |||
1478 | ||||
1479 | // Skip non-inlined call edges which are not important since top down inlining | |||
1480 | // for non-CS profile is to get more precise profile matching, not to enable | |||
1481 | // more inlining. | |||
1482 | ||||
1483 | for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { | |||
1484 | for (const auto &InlinedSamples : CallsiteSamples.second) { | |||
1485 | Function *Callee = SymbolMap.lookup(InlinedSamples.first); | |||
1486 | if (Callee && !Callee->isDeclaration()) | |||
1487 | CG[Caller]->addCalledFunction(nullptr, CG[Callee]); | |||
1488 | addCallGraphEdges(CG, InlinedSamples.second); | |||
1489 | } | |||
1490 | } | |||
1491 | } | |||
1492 | ||||
1493 | // Replace call graph edges with dynamic call edges from the profile. | |||
1494 | void SampleProfileLoader::replaceCallGraphEdges( | |||
1495 | CallGraph &CG, StringMap<Function *> &SymbolMap) { | |||
1496 | // Remove static call edges from the call graph except for the ones from the | |||
1497 | // root which make the call graph connected. | |||
1498 | for (const auto &Node : CG) | |||
1499 | if (Node.second.get() != CG.getExternalCallingNode()) | |||
1500 | Node.second->removeAllCalledFunctions(); | |||
1501 | ||||
1502 | // Add profile call edges to the call graph. | |||
1503 | if (ProfileIsCS) { | |||
1504 | ContextTracker->addCallGraphEdges(CG, SymbolMap); | |||
1505 | } else { | |||
1506 | for (const auto &Samples : Reader->getProfiles()) | |||
1507 | addCallGraphEdges(CG, Samples.second); | |||
1508 | } | |||
1509 | } | |||
1510 | ||||
1511 | std::vector<Function *> | |||
1512 | SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { | |||
1513 | std::vector<Function *> FunctionOrderList; | |||
1514 | FunctionOrderList.reserve(M.size()); | |||
1515 | ||||
1516 | if (!ProfileTopDownLoad || CG == nullptr) { | |||
1517 | if (ProfileMergeInlinee) { | |||
1518 | // Disable ProfileMergeInlinee if profile is not loaded in top down order, | |||
1519 | // because the profile for a function may be used for the profile | |||
1520 | // annotation of its outline copy before the profile merging of its | |||
1521 | // non-inlined inline instances, and that is not the way how | |||
1522 | // ProfileMergeInlinee is supposed to work. | |||
1523 | ProfileMergeInlinee = false; | |||
1524 | } | |||
1525 | ||||
1526 | for (Function &F : M) | |||
1527 | if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) | |||
1528 | FunctionOrderList.push_back(&F); | |||
1529 | return FunctionOrderList; | |||
1530 | } | |||
1531 | ||||
1532 | assert(&CG->getModule() == &M)((&CG->getModule() == &M) ? static_cast<void> (0) : __assert_fail ("&CG->getModule() == &M", "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1532, __PRETTY_FUNCTION__)); | |||
1533 | ||||
1534 | // Add indirect call edges from profile to augment the static call graph. | |||
1535 | // Functions will be processed in a top-down order defined by the static call | |||
1536 | // graph. Adjusting the order by considering indirect call edges from the | |||
1537 | // profile (which don't exist in the static call graph) can enable the | |||
1538 | // inlining of indirect call targets by processing the caller before them. | |||
1539 | // TODO: enable this for non-CS profile and fix the counts returning logic to | |||
1540 | // have a full support for indirect calls. | |||
1541 | if (UseProfileIndirectCallEdges && ProfileIsCS) { | |||
1542 | for (auto &Entry : *CG) { | |||
1543 | const auto *F = Entry.first; | |||
1544 | if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) | |||
1545 | continue; | |||
1546 | auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName()); | |||
1547 | if (AllContexts.empty()) | |||
1548 | continue; | |||
1549 | ||||
1550 | for (const auto &BB : *F) { | |||
1551 | for (const auto &I : BB.getInstList()) { | |||
1552 | const auto *CB = dyn_cast<CallBase>(&I); | |||
1553 | if (!CB || !CB->isIndirectCall()) | |||
1554 | continue; | |||
1555 | const DebugLoc &DLoc = I.getDebugLoc(); | |||
1556 | if (!DLoc) | |||
1557 | continue; | |||
1558 | auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc); | |||
1559 | for (FunctionSamples *Samples : AllContexts) { | |||
1560 | if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) { | |||
1561 | for (const auto &Target : CallTargets.get()) { | |||
1562 | Function *Callee = SymbolMap.lookup(Target.first()); | |||
1563 | if (Callee && !Callee->isDeclaration()) | |||
1564 | Entry.second->addCalledFunction(nullptr, (*CG)[Callee]); | |||
1565 | } | |||
1566 | } | |||
1567 | } | |||
1568 | } | |||
1569 | } | |||
1570 | } | |||
1571 | } | |||
1572 | ||||
1573 | // Compute a top-down order the profile which is used to sort functions in | |||
1574 | // one SCC later. The static processing order computed for an SCC may not | |||
1575 | // reflect the call contexts in the context-sensitive profile, thus may cause | |||
1576 | // potential inlining to be overlooked. The function order in one SCC is being | |||
1577 | // adjusted to a top-down order based on the profile to favor more inlining. | |||
1578 | DenseMap<Function *, uint64_t> ProfileOrderMap; | |||
1579 | if (UseProfileTopDownOrder || | |||
1580 | (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) { | |||
1581 | // Create a static call graph. The call edges are not important since they | |||
1582 | // will be replaced by dynamic edges from the profile. | |||
1583 | CallGraph ProfileCG(M); | |||
1584 | replaceCallGraphEdges(ProfileCG, SymbolMap); | |||
1585 | scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG); | |||
1586 | uint64_t I = 0; | |||
1587 | while (!CGI.isAtEnd()) { | |||
1588 | for (CallGraphNode *Node : *CGI) { | |||
1589 | if (auto *F = Node->getFunction()) | |||
1590 | ProfileOrderMap[F] = ++I; | |||
1591 | } | |||
1592 | ++CGI; | |||
1593 | } | |||
1594 | } | |||
1595 | ||||
1596 | scc_iterator<CallGraph *> CGI = scc_begin(CG); | |||
1597 | while (!CGI.isAtEnd()) { | |||
1598 | uint64_t Start = FunctionOrderList.size(); | |||
1599 | for (CallGraphNode *Node : *CGI) { | |||
1600 | auto *F = Node->getFunction(); | |||
1601 | if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) | |||
1602 | FunctionOrderList.push_back(F); | |||
1603 | } | |||
1604 | ||||
1605 | // Sort nodes in SCC based on the profile top-down order. | |||
1606 | if (!ProfileOrderMap.empty()) { | |||
1607 | std::stable_sort(FunctionOrderList.begin() + Start, | |||
1608 | FunctionOrderList.end(), | |||
1609 | [&ProfileOrderMap](Function *Left, Function *Right) { | |||
1610 | return ProfileOrderMap[Left] < ProfileOrderMap[Right]; | |||
1611 | }); | |||
1612 | } | |||
1613 | ||||
1614 | ++CGI; | |||
1615 | } | |||
1616 | ||||
1617 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1618 | dbgs() << "Function processing order:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1619 | for (auto F : reverse(FunctionOrderList)) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1620 | dbgs() << F->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1621 | }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1622 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false); | |||
1623 | ||||
1624 | std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); | |||
1625 | return FunctionOrderList; | |||
1626 | } | |||
1627 | ||||
1628 | bool SampleProfileLoader::doInitialization(Module &M, | |||
1629 | FunctionAnalysisManager *FAM) { | |||
1630 | auto &Ctx = M.getContext(); | |||
1631 | ||||
1632 | auto ReaderOrErr = | |||
1633 | SampleProfileReader::create(Filename, Ctx, RemappingFilename); | |||
1634 | if (std::error_code EC = ReaderOrErr.getError()) { | |||
1635 | std::string Msg = "Could not open profile: " + EC.message(); | |||
1636 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1637 | return false; | |||
1638 | } | |||
1639 | Reader = std::move(ReaderOrErr.get()); | |||
1640 | Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink); | |||
1641 | Reader->collectFuncsFrom(M); | |||
1642 | if (std::error_code EC = Reader->read()) { | |||
1643 | std::string Msg = "profile reading failed: " + EC.message(); | |||
1644 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1645 | return false; | |||
1646 | } | |||
1647 | ||||
1648 | PSL = Reader->getProfileSymbolList(); | |||
1649 | ||||
1650 | // While profile-sample-accurate is on, ignore symbol list. | |||
1651 | ProfAccForSymsInList = | |||
1652 | ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; | |||
1653 | if (ProfAccForSymsInList) { | |||
1654 | NamesInProfile.clear(); | |||
1655 | if (auto NameTable = Reader->getNameTable()) | |||
1656 | NamesInProfile.insert(NameTable->begin(), NameTable->end()); | |||
1657 | CoverageTracker.setProfAccForSymsInList(true); | |||
1658 | } | |||
1659 | ||||
1660 | if (FAM && !ProfileInlineReplayFile.empty()) { | |||
1661 | ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>( | |||
1662 | M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile, | |||
1663 | /*EmitRemarks=*/false); | |||
1664 | if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) | |||
1665 | ExternalInlineAdvisor.reset(); | |||
1666 | } | |||
1667 | ||||
1668 | // Apply tweaks if context-sensitive profile is available. | |||
1669 | if (Reader->profileIsCS()) { | |||
1670 | ProfileIsCS = true; | |||
1671 | FunctionSamples::ProfileIsCS = true; | |||
1672 | ||||
1673 | // Enable priority-base inliner and size inline by default for CSSPGO. | |||
1674 | if (!ProfileSizeInline.getNumOccurrences()) | |||
1675 | ProfileSizeInline = true; | |||
1676 | if (!CallsitePrioritizedInline.getNumOccurrences()) | |||
1677 | CallsitePrioritizedInline = true; | |||
1678 | ||||
1679 | // Tracker for profiles under different context | |||
1680 | ContextTracker = | |||
1681 | std::make_unique<SampleContextTracker>(Reader->getProfiles()); | |||
1682 | } | |||
1683 | ||||
1684 | // Load pseudo probe descriptors for probe-based function samples. | |||
1685 | if (Reader->profileIsProbeBased()) { | |||
1686 | ProbeManager = std::make_unique<PseudoProbeManager>(M); | |||
1687 | if (!ProbeManager->moduleIsProbed(M)) { | |||
1688 | const char *Msg = | |||
1689 | "Pseudo-probe-based profile requires SampleProfileProbePass"; | |||
1690 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1691 | return false; | |||
1692 | } | |||
1693 | } | |||
1694 | ||||
1695 | return true; | |||
1696 | } | |||
1697 | ||||
1698 | ModulePass *llvm::createSampleProfileLoaderPass() { | |||
1699 | return new SampleProfileLoaderLegacyPass(); | |||
1700 | } | |||
1701 | ||||
1702 | ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { | |||
1703 | return new SampleProfileLoaderLegacyPass(Name); | |||
1704 | } | |||
1705 | ||||
1706 | bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, | |||
1707 | ProfileSummaryInfo *_PSI, CallGraph *CG) { | |||
1708 | GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); | |||
1709 | ||||
1710 | PSI = _PSI; | |||
1711 | if (M.getProfileSummary(/* IsCS */ false) == nullptr) { | |||
1712 | M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), | |||
1713 | ProfileSummary::PSK_Sample); | |||
1714 | PSI->refresh(); | |||
1715 | } | |||
1716 | // Compute the total number of samples collected in this profile. | |||
1717 | for (const auto &I : Reader->getProfiles()) | |||
1718 | TotalCollectedSamples += I.second.getTotalSamples(); | |||
1719 | ||||
1720 | auto Remapper = Reader->getRemapper(); | |||
1721 | // Populate the symbol map. | |||
1722 | for (const auto &N_F : M.getValueSymbolTable()) { | |||
1723 | StringRef OrigName = N_F.getKey(); | |||
1724 | Function *F = dyn_cast<Function>(N_F.getValue()); | |||
1725 | if (F == nullptr) | |||
1726 | continue; | |||
1727 | SymbolMap[OrigName] = F; | |||
1728 | auto pos = OrigName.find('.'); | |||
1729 | if (pos != StringRef::npos) { | |||
1730 | StringRef NewName = OrigName.substr(0, pos); | |||
1731 | auto r = SymbolMap.insert(std::make_pair(NewName, F)); | |||
1732 | // Failiing to insert means there is already an entry in SymbolMap, | |||
1733 | // thus there are multiple functions that are mapped to the same | |||
1734 | // stripped name. In this case of name conflicting, set the value | |||
1735 | // to nullptr to avoid confusion. | |||
1736 | if (!r.second) | |||
1737 | r.first->second = nullptr; | |||
1738 | OrigName = NewName; | |||
1739 | } | |||
1740 | // Insert the remapped names into SymbolMap. | |||
1741 | if (Remapper) { | |||
1742 | if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) { | |||
1743 | if (*MapName == OrigName) | |||
1744 | continue; | |||
1745 | SymbolMap.insert(std::make_pair(*MapName, F)); | |||
1746 | } | |||
1747 | } | |||
1748 | } | |||
1749 | ||||
1750 | bool retval = false; | |||
1751 | for (auto F : buildFunctionOrder(M, CG)) { | |||
1752 | assert(!F->isDeclaration())((!F->isDeclaration()) ? static_cast<void> (0) : __assert_fail ("!F->isDeclaration()", "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1752, __PRETTY_FUNCTION__)); | |||
1753 | clearFunctionData(); | |||
1754 | retval |= runOnFunction(*F, AM); | |||
1755 | } | |||
1756 | ||||
1757 | // Account for cold calls not inlined.... | |||
1758 | if (!ProfileIsCS) | |||
1759 | for (const std::pair<Function *, NotInlinedProfileInfo> &pair : | |||
1760 | notInlinedCallInfo) | |||
1761 | updateProfileCallee(pair.first, pair.second.entryCount); | |||
1762 | ||||
1763 | return retval; | |||
1764 | } | |||
1765 | ||||
1766 | bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { | |||
1767 | ACT = &getAnalysis<AssumptionCacheTracker>(); | |||
1768 | TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); | |||
1769 | TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); | |||
1770 | ProfileSummaryInfo *PSI = | |||
1771 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); | |||
1772 | return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); | |||
1773 | } | |||
1774 | ||||
1775 | bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { | |||
1776 | LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\n\nProcessing Function " << F.getName() << "\n"; } } while (false); | |||
1777 | DILocation2SampleMap.clear(); | |||
1778 | // By default the entry count is initialized to -1, which will be treated | |||
1779 | // conservatively by getEntryCount as the same as unknown (None). This is | |||
1780 | // to avoid newly added code to be treated as cold. If we have samples | |||
1781 | // this will be overwritten in emitAnnotations. | |||
1782 | uint64_t initialEntryCount = -1; | |||
1783 | ||||
1784 | ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; | |||
1785 | if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { | |||
1786 | // initialize all the function entry counts to 0. It means all the | |||
1787 | // functions without profile will be regarded as cold. | |||
1788 | initialEntryCount = 0; | |||
1789 | // profile-sample-accurate is a user assertion which has a higher precedence | |||
1790 | // than symbol list. When profile-sample-accurate is on, ignore symbol list. | |||
1791 | ProfAccForSymsInList = false; | |||
1792 | } | |||
1793 | CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList); | |||
1794 | ||||
1795 | // PSL -- profile symbol list include all the symbols in sampled binary. | |||
1796 | // If ProfileAccurateForSymsInList is enabled, PSL is used to treat | |||
1797 | // old functions without samples being cold, without having to worry | |||
1798 | // about new and hot functions being mistakenly treated as cold. | |||
1799 | if (ProfAccForSymsInList) { | |||
1800 | // Initialize the entry count to 0 for functions in the list. | |||
1801 | if (PSL->contains(F.getName())) | |||
1802 | initialEntryCount = 0; | |||
1803 | ||||
1804 | // Function in the symbol list but without sample will be regarded as | |||
1805 | // cold. To minimize the potential negative performance impact it could | |||
1806 | // have, we want to be a little conservative here saying if a function | |||
1807 | // shows up in the profile, no matter as outline function, inline instance | |||
1808 | // or call targets, treat the function as not being cold. This will handle | |||
1809 | // the cases such as most callsites of a function are inlined in sampled | |||
1810 | // binary but not inlined in current build (because of source code drift, | |||
1811 | // imprecise debug information, or the callsites are all cold individually | |||
1812 | // but not cold accumulatively...), so the outline function showing up as | |||
1813 | // cold in sampled binary will actually not be cold after current build. | |||
1814 | StringRef CanonName = FunctionSamples::getCanonicalFnName(F); | |||
1815 | if (NamesInProfile.count(CanonName)) | |||
1816 | initialEntryCount = -1; | |||
1817 | } | |||
1818 | ||||
1819 | // Initialize entry count when the function has no existing entry | |||
1820 | // count value. | |||
1821 | if (!F.getEntryCount().hasValue()) | |||
1822 | F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); | |||
1823 | std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; | |||
1824 | if (AM) { | |||
1825 | auto &FAM = | |||
1826 | AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent()) | |||
1827 | .getManager(); | |||
1828 | ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); | |||
1829 | } else { | |||
1830 | OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); | |||
1831 | ORE = OwnedORE.get(); | |||
1832 | } | |||
1833 | ||||
1834 | if (ProfileIsCS) | |||
1835 | Samples = ContextTracker->getBaseSamplesFor(F); | |||
1836 | else | |||
1837 | Samples = Reader->getSamplesFor(F); | |||
1838 | ||||
1839 | if (Samples && !Samples->empty()) | |||
1840 | return emitAnnotations(F); | |||
1841 | return false; | |||
1842 | } | |||
1843 | ||||
1844 | PreservedAnalyses SampleProfileLoaderPass::run(Module &M, | |||
1845 | ModuleAnalysisManager &AM) { | |||
1846 | FunctionAnalysisManager &FAM = | |||
1847 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
1848 | ||||
1849 | auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { | |||
1850 | return FAM.getResult<AssumptionAnalysis>(F); | |||
1851 | }; | |||
1852 | auto GetTTI = [&](Function &F) -> TargetTransformInfo & { | |||
1853 | return FAM.getResult<TargetIRAnalysis>(F); | |||
1854 | }; | |||
1855 | auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { | |||
1856 | return FAM.getResult<TargetLibraryAnalysis>(F); | |||
1857 | }; | |||
1858 | ||||
1859 | SampleProfileLoader SampleLoader( | |||
1860 | ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, | |||
1861 | ProfileRemappingFileName.empty() ? SampleProfileRemappingFile | |||
1862 | : ProfileRemappingFileName, | |||
1863 | LTOPhase, GetAssumptionCache, GetTTI, GetTLI); | |||
1864 | ||||
1865 | if (!SampleLoader.doInitialization(M, &FAM)) | |||
1866 | return PreservedAnalyses::all(); | |||
1867 | ||||
1868 | ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); | |||
1869 | CallGraph &CG = AM.getResult<CallGraphAnalysis>(M); | |||
1870 | if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) | |||
1871 | return PreservedAnalyses::all(); | |||
1872 | ||||
1873 | return PreservedAnalyses::none(); | |||
1874 | } |