File: | llvm/lib/Transforms/IPO/SampleProfile.cpp |
Warning: | line 1335, column 7 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements the SampleProfileLoader transformation. This pass | |||
10 | // reads a profile file generated by a sampling profiler (e.g. Linux Perf - | |||
11 | // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the | |||
12 | // profile information in the given profile. | |||
13 | // | |||
14 | // This pass generates branch weight annotations on the IR: | |||
15 | // | |||
16 | // - prof: Represents branch weights. This annotation is added to branches | |||
17 | // to indicate the weights of each edge coming out of the branch. | |||
18 | // The weight of each edge is the weight of the target block for | |||
19 | // that edge. The weight of a block B is computed as the maximum | |||
20 | // number of samples found in B. | |||
21 | // | |||
22 | //===----------------------------------------------------------------------===// | |||
23 | ||||
24 | #include "llvm/Transforms/IPO/SampleProfile.h" | |||
25 | #include "llvm/ADT/ArrayRef.h" | |||
26 | #include "llvm/ADT/DenseMap.h" | |||
27 | #include "llvm/ADT/DenseSet.h" | |||
28 | #include "llvm/ADT/None.h" | |||
29 | #include "llvm/ADT/PriorityQueue.h" | |||
30 | #include "llvm/ADT/SCCIterator.h" | |||
31 | #include "llvm/ADT/SmallPtrSet.h" | |||
32 | #include "llvm/ADT/SmallSet.h" | |||
33 | #include "llvm/ADT/SmallVector.h" | |||
34 | #include "llvm/ADT/Statistic.h" | |||
35 | #include "llvm/ADT/StringMap.h" | |||
36 | #include "llvm/ADT/StringRef.h" | |||
37 | #include "llvm/ADT/Twine.h" | |||
38 | #include "llvm/Analysis/AssumptionCache.h" | |||
39 | #include "llvm/Analysis/CallGraph.h" | |||
40 | #include "llvm/Analysis/CallGraphSCCPass.h" | |||
41 | #include "llvm/Analysis/InlineAdvisor.h" | |||
42 | #include "llvm/Analysis/InlineCost.h" | |||
43 | #include "llvm/Analysis/LoopInfo.h" | |||
44 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
45 | #include "llvm/Analysis/PostDominators.h" | |||
46 | #include "llvm/Analysis/ProfileSummaryInfo.h" | |||
47 | #include "llvm/Analysis/ReplayInlineAdvisor.h" | |||
48 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
49 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
50 | #include "llvm/IR/BasicBlock.h" | |||
51 | #include "llvm/IR/CFG.h" | |||
52 | #include "llvm/IR/DebugInfoMetadata.h" | |||
53 | #include "llvm/IR/DebugLoc.h" | |||
54 | #include "llvm/IR/DiagnosticInfo.h" | |||
55 | #include "llvm/IR/Dominators.h" | |||
56 | #include "llvm/IR/Function.h" | |||
57 | #include "llvm/IR/GlobalValue.h" | |||
58 | #include "llvm/IR/InstrTypes.h" | |||
59 | #include "llvm/IR/Instruction.h" | |||
60 | #include "llvm/IR/Instructions.h" | |||
61 | #include "llvm/IR/IntrinsicInst.h" | |||
62 | #include "llvm/IR/LLVMContext.h" | |||
63 | #include "llvm/IR/MDBuilder.h" | |||
64 | #include "llvm/IR/Module.h" | |||
65 | #include "llvm/IR/PassManager.h" | |||
66 | #include "llvm/IR/ValueSymbolTable.h" | |||
67 | #include "llvm/InitializePasses.h" | |||
68 | #include "llvm/Pass.h" | |||
69 | #include "llvm/ProfileData/InstrProf.h" | |||
70 | #include "llvm/ProfileData/SampleProf.h" | |||
71 | #include "llvm/ProfileData/SampleProfReader.h" | |||
72 | #include "llvm/Support/Casting.h" | |||
73 | #include "llvm/Support/CommandLine.h" | |||
74 | #include "llvm/Support/Debug.h" | |||
75 | #include "llvm/Support/ErrorHandling.h" | |||
76 | #include "llvm/Support/ErrorOr.h" | |||
77 | #include "llvm/Support/GenericDomTree.h" | |||
78 | #include "llvm/Support/raw_ostream.h" | |||
79 | #include "llvm/Transforms/IPO.h" | |||
80 | #include "llvm/Transforms/IPO/ProfiledCallGraph.h" | |||
81 | #include "llvm/Transforms/IPO/SampleContextTracker.h" | |||
82 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" | |||
83 | #include "llvm/Transforms/Instrumentation.h" | |||
84 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" | |||
85 | #include "llvm/Transforms/Utils/Cloning.h" | |||
86 | #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" | |||
87 | #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" | |||
88 | #include <algorithm> | |||
89 | #include <cassert> | |||
90 | #include <cstdint> | |||
91 | #include <functional> | |||
92 | #include <limits> | |||
93 | #include <map> | |||
94 | #include <memory> | |||
95 | #include <queue> | |||
96 | #include <string> | |||
97 | #include <system_error> | |||
98 | #include <utility> | |||
99 | #include <vector> | |||
100 | ||||
101 | using namespace llvm; | |||
102 | using namespace sampleprof; | |||
103 | using namespace llvm::sampleprofutil; | |||
104 | using ProfileCount = Function::ProfileCount; | |||
105 | #define DEBUG_TYPE"sample-profile" "sample-profile" | |||
106 | #define CSINLINE_DEBUG"sample-profile" "-inline" DEBUG_TYPE"sample-profile" "-inline" | |||
107 | ||||
108 | STATISTIC(NumCSInlined,static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined" , "Number of functions inlined with context sensitive profile" } | |||
109 | "Number of functions inlined with context sensitive profile")static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined" , "Number of functions inlined with context sensitive profile" }; | |||
110 | STATISTIC(NumCSNotInlined,static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined" , "Number of functions not inlined with context sensitive profile" } | |||
111 | "Number of functions not inlined with context sensitive profile")static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined" , "Number of functions not inlined with context sensitive profile" }; | |||
112 | STATISTIC(NumMismatchedProfile,static llvm::Statistic NumMismatchedProfile = {"sample-profile" , "NumMismatchedProfile", "Number of functions with CFG mismatched profile" } | |||
113 | "Number of functions with CFG mismatched profile")static llvm::Statistic NumMismatchedProfile = {"sample-profile" , "NumMismatchedProfile", "Number of functions with CFG mismatched profile" }; | |||
114 | STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile")static llvm::Statistic NumMatchedProfile = {"sample-profile", "NumMatchedProfile", "Number of functions with CFG matched profile" }; | |||
115 | STATISTIC(NumDuplicatedInlinesite,static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile" , "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor" } | |||
116 | "Number of inlined callsites with a partial distribution factor")static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile" , "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor" }; | |||
117 | ||||
118 | STATISTIC(NumCSInlinedHitMinLimit,static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile" , "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit" } | |||
119 | "Number of functions with FDO inline stopped due to min size limit")static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile" , "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit" }; | |||
120 | STATISTIC(NumCSInlinedHitMaxLimit,static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile" , "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit" } | |||
121 | "Number of functions with FDO inline stopped due to max size limit")static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile" , "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit" }; | |||
122 | STATISTIC(static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" } | |||
123 | NumCSInlinedHitGrowthLimit,static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" } | |||
124 | "Number of functions with FDO inline stopped due to growth size limit")static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile" , "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit" }; | |||
125 | ||||
126 | // Command line option to specify the file to read samples from. This is | |||
127 | // mainly used for debugging. | |||
128 | static cl::opt<std::string> SampleProfileFile( | |||
129 | "sample-profile-file", cl::init(""), cl::value_desc("filename"), | |||
130 | cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); | |||
131 | ||||
132 | // The named file contains a set of transformations that may have been applied | |||
133 | // to the symbol names between the program from which the sample data was | |||
134 | // collected and the current program's symbols. | |||
135 | static cl::opt<std::string> SampleProfileRemappingFile( | |||
136 | "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), | |||
137 | cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden); | |||
138 | ||||
139 | static cl::opt<bool> ProfileSampleAccurate( | |||
140 | "profile-sample-accurate", cl::Hidden, cl::init(false), | |||
141 | cl::desc("If the sample profile is accurate, we will mark all un-sampled " | |||
142 | "callsite and function as having 0 samples. Otherwise, treat " | |||
143 | "un-sampled callsites and functions conservatively as unknown. ")); | |||
144 | ||||
145 | static cl::opt<bool> ProfileAccurateForSymsInList( | |||
146 | "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, | |||
147 | cl::init(true), | |||
148 | cl::desc("For symbols in profile symbol list, regard their profiles to " | |||
149 | "be accurate. It may be overriden by profile-sample-accurate. ")); | |||
150 | ||||
151 | static cl::opt<bool> ProfileMergeInlinee( | |||
152 | "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), | |||
153 | cl::desc("Merge past inlinee's profile to outline version if sample " | |||
154 | "profile loader decided not to inline a call site. It will " | |||
155 | "only be enabled when top-down order of profile loading is " | |||
156 | "enabled. ")); | |||
157 | ||||
158 | static cl::opt<bool> ProfileTopDownLoad( | |||
159 | "sample-profile-top-down-load", cl::Hidden, cl::init(true), | |||
160 | cl::desc("Do profile annotation and inlining for functions in top-down " | |||
161 | "order of call graph during sample profile loading. It only " | |||
162 | "works for new pass manager. ")); | |||
163 | ||||
164 | static cl::opt<bool> | |||
165 | UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, | |||
166 | cl::desc("Process functions in a top-down order " | |||
167 | "defined by the profiled call graph when " | |||
168 | "-sample-profile-top-down-load is on.")); | |||
169 | ||||
170 | static cl::opt<bool> ProfileSizeInline( | |||
171 | "sample-profile-inline-size", cl::Hidden, cl::init(false), | |||
172 | cl::desc("Inline cold call sites in profile loader if it's beneficial " | |||
173 | "for code size.")); | |||
174 | ||||
175 | cl::opt<int> ProfileInlineGrowthLimit( | |||
176 | "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), | |||
177 | cl::desc("The size growth ratio limit for proirity-based sample profile " | |||
178 | "loader inlining.")); | |||
179 | ||||
180 | cl::opt<int> ProfileInlineLimitMin( | |||
181 | "sample-profile-inline-limit-min", cl::Hidden, cl::init(100), | |||
182 | cl::desc("The lower bound of size growth limit for " | |||
183 | "proirity-based sample profile loader inlining.")); | |||
184 | ||||
185 | cl::opt<int> ProfileInlineLimitMax( | |||
186 | "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), | |||
187 | cl::desc("The upper bound of size growth limit for " | |||
188 | "proirity-based sample profile loader inlining.")); | |||
189 | ||||
190 | cl::opt<int> SampleHotCallSiteThreshold( | |||
191 | "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), | |||
192 | cl::desc("Hot callsite threshold for proirity-based sample profile loader " | |||
193 | "inlining.")); | |||
194 | ||||
195 | cl::opt<int> SampleColdCallSiteThreshold( | |||
196 | "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), | |||
197 | cl::desc("Threshold for inlining cold callsites")); | |||
198 | ||||
199 | static cl::opt<int> ProfileICPThreshold( | |||
200 | "sample-profile-icp-threshold", cl::Hidden, cl::init(5), | |||
201 | cl::desc( | |||
202 | "Relative hotness threshold for indirect " | |||
203 | "call promotion in proirity-based sample profile loader inlining.")); | |||
204 | ||||
205 | static cl::opt<bool> CallsitePrioritizedInline( | |||
206 | "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, | |||
207 | cl::init(false), | |||
208 | cl::desc("Use call site prioritized inlining for sample profile loader." | |||
209 | "Currently only CSSPGO is supported.")); | |||
210 | ||||
211 | ||||
212 | static cl::opt<std::string> ProfileInlineReplayFile( | |||
213 | "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), | |||
214 | cl::desc( | |||
215 | "Optimization remarks file containing inline remarks to be replayed " | |||
216 | "by inlining from sample profile loader."), | |||
217 | cl::Hidden); | |||
218 | ||||
219 | static cl::opt<unsigned> | |||
220 | MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, | |||
221 | cl::ZeroOrMore, | |||
222 | cl::desc("Max number of promotions for a single indirect " | |||
223 | "call callsite in sample profile loader")); | |||
224 | ||||
225 | namespace { | |||
226 | ||||
227 | using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; | |||
228 | using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>; | |||
229 | using Edge = std::pair<const BasicBlock *, const BasicBlock *>; | |||
230 | using EdgeWeightMap = DenseMap<Edge, uint64_t>; | |||
231 | using BlockEdgeMap = | |||
232 | DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; | |||
233 | ||||
234 | class GUIDToFuncNameMapper { | |||
235 | public: | |||
236 | GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, | |||
237 | DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) | |||
238 | : CurrentReader(Reader), CurrentModule(M), | |||
239 | CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { | |||
240 | if (!CurrentReader.useMD5()) | |||
241 | return; | |||
242 | ||||
243 | for (const auto &F : CurrentModule) { | |||
244 | StringRef OrigName = F.getName(); | |||
245 | CurrentGUIDToFuncNameMap.insert( | |||
246 | {Function::getGUID(OrigName), OrigName}); | |||
247 | ||||
248 | // Local to global var promotion used by optimization like thinlto | |||
249 | // will rename the var and add suffix like ".llvm.xxx" to the | |||
250 | // original local name. In sample profile, the suffixes of function | |||
251 | // names are all stripped. Since it is possible that the mapper is | |||
252 | // built in post-thin-link phase and var promotion has been done, | |||
253 | // we need to add the substring of function name without the suffix | |||
254 | // into the GUIDToFuncNameMap. | |||
255 | StringRef CanonName = FunctionSamples::getCanonicalFnName(F); | |||
256 | if (CanonName != OrigName) | |||
257 | CurrentGUIDToFuncNameMap.insert( | |||
258 | {Function::getGUID(CanonName), CanonName}); | |||
259 | } | |||
260 | ||||
261 | // Update GUIDToFuncNameMap for each function including inlinees. | |||
262 | SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); | |||
263 | } | |||
264 | ||||
265 | ~GUIDToFuncNameMapper() { | |||
266 | if (!CurrentReader.useMD5()) | |||
267 | return; | |||
268 | ||||
269 | CurrentGUIDToFuncNameMap.clear(); | |||
270 | ||||
271 | // Reset GUIDToFuncNameMap for of each function as they're no | |||
272 | // longer valid at this point. | |||
273 | SetGUIDToFuncNameMapForAll(nullptr); | |||
274 | } | |||
275 | ||||
276 | private: | |||
277 | void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) { | |||
278 | std::queue<FunctionSamples *> FSToUpdate; | |||
279 | for (auto &IFS : CurrentReader.getProfiles()) { | |||
280 | FSToUpdate.push(&IFS.second); | |||
281 | } | |||
282 | ||||
283 | while (!FSToUpdate.empty()) { | |||
284 | FunctionSamples *FS = FSToUpdate.front(); | |||
285 | FSToUpdate.pop(); | |||
286 | FS->GUIDToFuncNameMap = Map; | |||
287 | for (const auto &ICS : FS->getCallsiteSamples()) { | |||
288 | const FunctionSamplesMap &FSMap = ICS.second; | |||
289 | for (auto &IFS : FSMap) { | |||
290 | FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second); | |||
291 | FSToUpdate.push(&FS); | |||
292 | } | |||
293 | } | |||
294 | } | |||
295 | } | |||
296 | ||||
297 | SampleProfileReader &CurrentReader; | |||
298 | Module &CurrentModule; | |||
299 | DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap; | |||
300 | }; | |||
301 | ||||
302 | // Inline candidate used by iterative callsite prioritized inliner | |||
303 | struct InlineCandidate { | |||
304 | CallBase *CallInstr; | |||
305 | const FunctionSamples *CalleeSamples; | |||
306 | // Prorated callsite count, which will be used to guide inlining. For example, | |||
307 | // if a callsite is duplicated in LTO prelink, then in LTO postlink the two | |||
308 | // copies will get their own distribution factors and their prorated counts | |||
309 | // will be used to decide if they should be inlined independently. | |||
310 | uint64_t CallsiteCount; | |||
311 | // Call site distribution factor to prorate the profile samples for a | |||
312 | // duplicated callsite. Default value is 1.0. | |||
313 | float CallsiteDistribution; | |||
314 | }; | |||
315 | ||||
316 | // Inline candidate comparer using call site weight | |||
317 | struct CandidateComparer { | |||
318 | bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) { | |||
319 | if (LHS.CallsiteCount != RHS.CallsiteCount) | |||
320 | return LHS.CallsiteCount < RHS.CallsiteCount; | |||
321 | ||||
322 | const FunctionSamples *LCS = LHS.CalleeSamples; | |||
323 | const FunctionSamples *RCS = RHS.CalleeSamples; | |||
324 | assert(LCS && RCS && "Expect non-null FunctionSamples")(static_cast <bool> (LCS && RCS && "Expect non-null FunctionSamples" ) ? void (0) : __assert_fail ("LCS && RCS && \"Expect non-null FunctionSamples\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 324, __extension__ __PRETTY_FUNCTION__)); | |||
325 | ||||
326 | // Tie breaker using number of samples try to favor smaller functions first | |||
327 | if (LCS->getBodySamples().size() != RCS->getBodySamples().size()) | |||
328 | return LCS->getBodySamples().size() > RCS->getBodySamples().size(); | |||
329 | ||||
330 | // Tie breaker using GUID so we have stable/deterministic inlining order | |||
331 | return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName()); | |||
332 | } | |||
333 | }; | |||
334 | ||||
335 | using CandidateQueue = | |||
336 | PriorityQueue<InlineCandidate, std::vector<InlineCandidate>, | |||
337 | CandidateComparer>; | |||
338 | ||||
339 | /// Sample profile pass. | |||
340 | /// | |||
341 | /// This pass reads profile data from the file specified by | |||
342 | /// -sample-profile-file and annotates every affected function with the | |||
343 | /// profile information found in that file. | |||
344 | class SampleProfileLoader final | |||
345 | : public SampleProfileLoaderBaseImpl<BasicBlock> { | |||
346 | public: | |||
347 | SampleProfileLoader( | |||
348 | StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase, | |||
349 | std::function<AssumptionCache &(Function &)> GetAssumptionCache, | |||
350 | std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo, | |||
351 | std::function<const TargetLibraryInfo &(Function &)> GetTLI) | |||
352 | : SampleProfileLoaderBaseImpl(std::string(Name)), | |||
353 | GetAC(std::move(GetAssumptionCache)), | |||
354 | GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), | |||
355 | RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {} | |||
356 | ||||
357 | bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); | |||
358 | bool runOnModule(Module &M, ModuleAnalysisManager *AM, | |||
359 | ProfileSummaryInfo *_PSI, CallGraph *CG); | |||
360 | ||||
361 | protected: | |||
362 | bool runOnFunction(Function &F, ModuleAnalysisManager *AM); | |||
363 | bool emitAnnotations(Function &F); | |||
364 | ErrorOr<uint64_t> getInstWeight(const Instruction &I) override; | |||
365 | ErrorOr<uint64_t> getProbeWeight(const Instruction &I); | |||
366 | const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const; | |||
367 | const FunctionSamples * | |||
368 | findFunctionSamples(const Instruction &I) const override; | |||
369 | std::vector<const FunctionSamples *> | |||
370 | findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; | |||
371 | void findExternalInlineCandidate(const FunctionSamples *Samples, | |||
372 | DenseSet<GlobalValue::GUID> &InlinedGUIDs, | |||
373 | const StringMap<Function *> &SymbolMap, | |||
374 | uint64_t Threshold); | |||
375 | // Attempt to promote indirect call and also inline the promoted call | |||
376 | bool tryPromoteAndInlineCandidate( | |||
377 | Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, | |||
378 | uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); | |||
379 | bool inlineHotFunctions(Function &F, | |||
380 | DenseSet<GlobalValue::GUID> &InlinedGUIDs); | |||
381 | InlineCost shouldInlineCandidate(InlineCandidate &Candidate); | |||
382 | bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); | |||
383 | bool | |||
384 | tryInlineCandidate(InlineCandidate &Candidate, | |||
385 | SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); | |||
386 | bool | |||
387 | inlineHotFunctionsWithPriority(Function &F, | |||
388 | DenseSet<GlobalValue::GUID> &InlinedGUIDs); | |||
389 | // Inline cold/small functions in addition to hot ones | |||
390 | bool shouldInlineColdCallee(CallBase &CallInst); | |||
391 | void emitOptimizationRemarksForInlineCandidates( | |||
392 | const SmallVectorImpl<CallBase *> &Candidates, const Function &F, | |||
393 | bool Hot); | |||
394 | std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); | |||
395 | std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG); | |||
396 | void generateMDProfMetadata(Function &F); | |||
397 | ||||
398 | /// Map from function name to Function *. Used to find the function from | |||
399 | /// the function name. If the function name contains suffix, additional | |||
400 | /// entry is added to map from the stripped name to the function if there | |||
401 | /// is one-to-one mapping. | |||
402 | StringMap<Function *> SymbolMap; | |||
403 | ||||
404 | std::function<AssumptionCache &(Function &)> GetAC; | |||
405 | std::function<TargetTransformInfo &(Function &)> GetTTI; | |||
406 | std::function<const TargetLibraryInfo &(Function &)> GetTLI; | |||
407 | ||||
408 | /// Profile tracker for different context. | |||
409 | std::unique_ptr<SampleContextTracker> ContextTracker; | |||
410 | ||||
411 | /// Name of the profile remapping file to load. | |||
412 | std::string RemappingFilename; | |||
413 | ||||
414 | /// Flag indicating whether the profile input loaded successfully. | |||
415 | bool ProfileIsValid = false; | |||
416 | ||||
417 | /// Flag indicating whether input profile is context-sensitive | |||
418 | bool ProfileIsCS = false; | |||
419 | ||||
420 | /// Flag indicating which LTO/ThinLTO phase the pass is invoked in. | |||
421 | /// | |||
422 | /// We need to know the LTO phase because for example in ThinLTOPrelink | |||
423 | /// phase, in annotation, we should not promote indirect calls. Instead, | |||
424 | /// we will mark GUIDs that needs to be annotated to the function. | |||
425 | ThinOrFullLTOPhase LTOPhase; | |||
426 | ||||
427 | /// Profle Symbol list tells whether a function name appears in the binary | |||
428 | /// used to generate the current profile. | |||
429 | std::unique_ptr<ProfileSymbolList> PSL; | |||
430 | ||||
431 | /// Total number of samples collected in this profile. | |||
432 | /// | |||
433 | /// This is the sum of all the samples collected in all the functions executed | |||
434 | /// at runtime. | |||
435 | uint64_t TotalCollectedSamples = 0; | |||
436 | ||||
437 | // Information recorded when we declined to inline a call site | |||
438 | // because we have determined it is too cold is accumulated for | |||
439 | // each callee function. Initially this is just the entry count. | |||
440 | struct NotInlinedProfileInfo { | |||
441 | uint64_t entryCount; | |||
442 | }; | |||
443 | DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; | |||
444 | ||||
445 | // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for | |||
446 | // all the function symbols defined or declared in current module. | |||
447 | DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; | |||
448 | ||||
449 | // All the Names used in FunctionSamples including outline function | |||
450 | // names, inline instance names and call target names. | |||
451 | StringSet<> NamesInProfile; | |||
452 | ||||
453 | // For symbol in profile symbol list, whether to regard their profiles | |||
454 | // to be accurate. It is mainly decided by existance of profile symbol | |||
455 | // list and -profile-accurate-for-symsinlist flag, but it can be | |||
456 | // overriden by -profile-sample-accurate or profile-sample-accurate | |||
457 | // attribute. | |||
458 | bool ProfAccForSymsInList; | |||
459 | ||||
460 | // External inline advisor used to replay inline decision from remarks. | |||
461 | std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor; | |||
462 | ||||
463 | // A pseudo probe helper to correlate the imported sample counts. | |||
464 | std::unique_ptr<PseudoProbeManager> ProbeManager; | |||
465 | }; | |||
466 | ||||
467 | class SampleProfileLoaderLegacyPass : public ModulePass { | |||
468 | public: | |||
469 | // Class identification, replacement for typeinfo | |||
470 | static char ID; | |||
471 | ||||
472 | SampleProfileLoaderLegacyPass( | |||
473 | StringRef Name = SampleProfileFile, | |||
474 | ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) | |||
475 | : ModulePass(ID), SampleLoader( | |||
476 | Name, SampleProfileRemappingFile, LTOPhase, | |||
477 | [&](Function &F) -> AssumptionCache & { | |||
478 | return ACT->getAssumptionCache(F); | |||
479 | }, | |||
480 | [&](Function &F) -> TargetTransformInfo & { | |||
481 | return TTIWP->getTTI(F); | |||
482 | }, | |||
483 | [&](Function &F) -> TargetLibraryInfo & { | |||
484 | return TLIWP->getTLI(F); | |||
485 | }) { | |||
486 | initializeSampleProfileLoaderLegacyPassPass( | |||
487 | *PassRegistry::getPassRegistry()); | |||
488 | } | |||
489 | ||||
490 | void dump() { SampleLoader.dump(); } | |||
491 | ||||
492 | bool doInitialization(Module &M) override { | |||
493 | return SampleLoader.doInitialization(M); | |||
494 | } | |||
495 | ||||
496 | StringRef getPassName() const override { return "Sample profile pass"; } | |||
497 | bool runOnModule(Module &M) override; | |||
498 | ||||
499 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
500 | AU.addRequired<AssumptionCacheTracker>(); | |||
501 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
502 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
503 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); | |||
504 | } | |||
505 | ||||
506 | private: | |||
507 | SampleProfileLoader SampleLoader; | |||
508 | AssumptionCacheTracker *ACT = nullptr; | |||
509 | TargetTransformInfoWrapperPass *TTIWP = nullptr; | |||
510 | TargetLibraryInfoWrapperPass *TLIWP = nullptr; | |||
511 | }; | |||
512 | ||||
513 | } // end anonymous namespace | |||
514 | ||||
515 | ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { | |||
516 | if (FunctionSamples::ProfileIsProbeBased) | |||
517 | return getProbeWeight(Inst); | |||
518 | ||||
519 | const DebugLoc &DLoc = Inst.getDebugLoc(); | |||
520 | if (!DLoc) | |||
521 | return std::error_code(); | |||
522 | ||||
523 | // Ignore all intrinsics, phinodes and branch instructions. | |||
524 | // Branch and phinodes instruction usually contains debug info from sources | |||
525 | // outside of the residing basic block, thus we ignore them during annotation. | |||
526 | if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst)) | |||
527 | return std::error_code(); | |||
528 | ||||
529 | // For non-CS profile, if a direct call/invoke instruction is inlined in | |||
530 | // profile (findCalleeFunctionSamples returns non-empty result), but not | |||
531 | // inlined here, it means that the inlined callsite has no sample, thus the | |||
532 | // call instruction should have 0 count. | |||
533 | // For CS profile, the callsite count of previously inlined callees is | |||
534 | // populated with the entry count of the callees. | |||
535 | if (!ProfileIsCS) | |||
536 | if (const auto *CB = dyn_cast<CallBase>(&Inst)) | |||
537 | if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) | |||
538 | return 0; | |||
539 | ||||
540 | return getInstWeightImpl(Inst); | |||
541 | } | |||
542 | ||||
543 | ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { | |||
544 | assert(FunctionSamples::ProfileIsProbeBased &&(static_cast <bool> (FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based") ? void (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 545, __extension__ __PRETTY_FUNCTION__)) | |||
545 | "Profile is not pseudo probe based")(static_cast <bool> (FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based") ? void (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 545, __extension__ __PRETTY_FUNCTION__)); | |||
546 | Optional<PseudoProbe> Probe = extractProbe(Inst); | |||
547 | if (!Probe) | |||
548 | return std::error_code(); | |||
549 | ||||
550 | // Ignore danling probes since they are logically deleted and should not | |||
551 | // consume any profile samples. | |||
552 | if (Probe->isDangling()) | |||
553 | return std::error_code(); | |||
554 | ||||
555 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
556 | if (!FS) | |||
557 | return std::error_code(); | |||
558 | ||||
559 | // For non-CS profile, If a direct call/invoke instruction is inlined in | |||
560 | // profile (findCalleeFunctionSamples returns non-empty result), but not | |||
561 | // inlined here, it means that the inlined callsite has no sample, thus the | |||
562 | // call instruction should have 0 count. | |||
563 | // For CS profile, the callsite count of previously inlined callees is | |||
564 | // populated with the entry count of the callees. | |||
565 | if (!ProfileIsCS) | |||
566 | if (const auto *CB = dyn_cast<CallBase>(&Inst)) | |||
567 | if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) | |||
568 | return 0; | |||
569 | ||||
570 | const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0); | |||
571 | if (R) { | |||
572 | uint64_t Samples = R.get() * Probe->Factor; | |||
573 | bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); | |||
574 | if (FirstMark) { | |||
575 | ORE->emit([&]() { | |||
576 | OptimizationRemarkAnalysis Remark(DEBUG_TYPE"sample-profile", "AppliedSamples", &Inst); | |||
577 | Remark << "Applied " << ore::NV("NumSamples", Samples); | |||
578 | Remark << " samples from profile (ProbeId="; | |||
579 | Remark << ore::NV("ProbeId", Probe->Id); | |||
580 | Remark << ", Factor="; | |||
581 | Remark << ore::NV("Factor", Probe->Factor); | |||
582 | Remark << ", OriginalSamples="; | |||
583 | Remark << ore::NV("OriginalSamples", R.get()); | |||
584 | Remark << ")"; | |||
585 | return Remark; | |||
586 | }); | |||
587 | } | |||
588 | LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Instdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false) | |||
589 | << " - weight: " << R.get() << " - factor: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false) | |||
590 | << format("%0.2f", Probe->Factor) << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " " << Probe-> Id << ":" << Inst << " - weight: " << R.get() << " - factor: " << format("%0.2f", Probe ->Factor) << ")\n"; } } while (false); | |||
591 | return Samples; | |||
592 | } | |||
593 | return R; | |||
594 | } | |||
595 | ||||
596 | /// Get the FunctionSamples for a call instruction. | |||
597 | /// | |||
598 | /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined | |||
599 | /// instance in which that call instruction is calling to. It contains | |||
600 | /// all samples that resides in the inlined instance. We first find the | |||
601 | /// inlined instance in which the call instruction is from, then we | |||
602 | /// traverse its children to find the callsite with the matching | |||
603 | /// location. | |||
604 | /// | |||
605 | /// \param Inst Call/Invoke instruction to query. | |||
606 | /// | |||
607 | /// \returns The FunctionSamples pointer to the inlined instance. | |||
608 | const FunctionSamples * | |||
609 | SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { | |||
610 | const DILocation *DIL = Inst.getDebugLoc(); | |||
611 | if (!DIL) { | |||
612 | return nullptr; | |||
613 | } | |||
614 | ||||
615 | StringRef CalleeName; | |||
616 | if (Function *Callee = Inst.getCalledFunction()) | |||
617 | CalleeName = Callee->getName(); | |||
618 | ||||
619 | if (ProfileIsCS) | |||
620 | return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); | |||
621 | ||||
622 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
623 | if (FS == nullptr) | |||
624 | return nullptr; | |||
625 | ||||
626 | return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL), | |||
627 | CalleeName, Reader->getRemapper()); | |||
628 | } | |||
629 | ||||
630 | /// Returns a vector of FunctionSamples that are the indirect call targets | |||
631 | /// of \p Inst. The vector is sorted by the total number of samples. Stores | |||
632 | /// the total call count of the indirect call in \p Sum. | |||
633 | std::vector<const FunctionSamples *> | |||
634 | SampleProfileLoader::findIndirectCallFunctionSamples( | |||
635 | const Instruction &Inst, uint64_t &Sum) const { | |||
636 | const DILocation *DIL = Inst.getDebugLoc(); | |||
637 | std::vector<const FunctionSamples *> R; | |||
638 | ||||
639 | if (!DIL) { | |||
640 | return R; | |||
641 | } | |||
642 | ||||
643 | auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { | |||
644 | assert(L && R && "Expect non-null FunctionSamples")(static_cast <bool> (L && R && "Expect non-null FunctionSamples" ) ? void (0) : __assert_fail ("L && R && \"Expect non-null FunctionSamples\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 644, __extension__ __PRETTY_FUNCTION__)); | |||
645 | if (L->getEntrySamples() != R->getEntrySamples()) | |||
646 | return L->getEntrySamples() > R->getEntrySamples(); | |||
647 | return FunctionSamples::getGUID(L->getName()) < | |||
648 | FunctionSamples::getGUID(R->getName()); | |||
649 | }; | |||
650 | ||||
651 | if (ProfileIsCS) { | |||
652 | auto CalleeSamples = | |||
653 | ContextTracker->getIndirectCalleeContextSamplesFor(DIL); | |||
654 | if (CalleeSamples.empty()) | |||
655 | return R; | |||
656 | ||||
657 | // For CSSPGO, we only use target context profile's entry count | |||
658 | // as that already includes both inlined callee and non-inlined ones.. | |||
659 | Sum = 0; | |||
660 | for (const auto *const FS : CalleeSamples) { | |||
661 | Sum += FS->getEntrySamples(); | |||
662 | R.push_back(FS); | |||
663 | } | |||
664 | llvm::sort(R, FSCompare); | |||
665 | return R; | |||
666 | } | |||
667 | ||||
668 | const FunctionSamples *FS = findFunctionSamples(Inst); | |||
669 | if (FS == nullptr) | |||
670 | return R; | |||
671 | ||||
672 | auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); | |||
673 | auto T = FS->findCallTargetMapAt(CallSite); | |||
674 | Sum = 0; | |||
675 | if (T) | |||
676 | for (const auto &T_C : T.get()) | |||
677 | Sum += T_C.second; | |||
678 | if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { | |||
679 | if (M->empty()) | |||
680 | return R; | |||
681 | for (const auto &NameFS : *M) { | |||
682 | Sum += NameFS.second.getEntrySamples(); | |||
683 | R.push_back(&NameFS.second); | |||
684 | } | |||
685 | llvm::sort(R, FSCompare); | |||
686 | } | |||
687 | return R; | |||
688 | } | |||
689 | ||||
690 | const FunctionSamples * | |||
691 | SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { | |||
692 | if (FunctionSamples::ProfileIsProbeBased) { | |||
693 | Optional<PseudoProbe> Probe = extractProbe(Inst); | |||
694 | if (!Probe) | |||
695 | return nullptr; | |||
696 | } | |||
697 | ||||
698 | const DILocation *DIL = Inst.getDebugLoc(); | |||
699 | if (!DIL) | |||
700 | return Samples; | |||
701 | ||||
702 | auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); | |||
703 | if (it.second) { | |||
704 | if (ProfileIsCS) | |||
705 | it.first->second = ContextTracker->getContextSamplesFor(DIL); | |||
706 | else | |||
707 | it.first->second = | |||
708 | Samples->findFunctionSamples(DIL, Reader->getRemapper()); | |||
709 | } | |||
710 | return it.first->second; | |||
711 | } | |||
712 | ||||
713 | /// Check whether the indirect call promotion history of \p Inst allows | |||
714 | /// the promotion for \p Candidate. | |||
715 | /// If the profile count for the promotion candidate \p Candidate is | |||
716 | /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted | |||
717 | /// for \p Inst. If we already have at least MaxNumPromotions | |||
718 | /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we | |||
719 | /// cannot promote for \p Inst anymore. | |||
720 | static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) { | |||
721 | uint32_t NumVals = 0; | |||
722 | uint64_t TotalCount = 0; | |||
723 | std::unique_ptr<InstrProfValueData[]> ValueData = | |||
724 | std::make_unique<InstrProfValueData[]>(MaxNumPromotions); | |||
725 | bool Valid = | |||
726 | getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, | |||
727 | ValueData.get(), NumVals, TotalCount, true); | |||
728 | // No valid value profile so no promoted targets have been recorded | |||
729 | // before. Ok to do ICP. | |||
730 | if (!Valid) | |||
731 | return true; | |||
732 | ||||
733 | unsigned NumPromoted = 0; | |||
734 | for (uint32_t I = 0; I < NumVals; I++) { | |||
735 | if (ValueData[I].Count != NOMORE_ICP_MAGICNUM) | |||
736 | continue; | |||
737 | ||||
738 | // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the | |||
739 | // metadata, it means the candidate has been promoted for this | |||
740 | // indirect call. | |||
741 | if (ValueData[I].Value == Function::getGUID(Candidate)) | |||
742 | return false; | |||
743 | NumPromoted++; | |||
744 | // If already have MaxNumPromotions promotion, don't do it anymore. | |||
745 | if (NumPromoted == MaxNumPromotions) | |||
746 | return false; | |||
747 | } | |||
748 | return true; | |||
749 | } | |||
750 | ||||
751 | /// Update indirect call target profile metadata for \p Inst. | |||
752 | /// Usually \p Sum is the sum of counts of all the targets for \p Inst. | |||
753 | /// If it is 0, it means updateIDTMetaData is used to mark a | |||
754 | /// certain target to be promoted already. If it is not zero, | |||
755 | /// we expect to use it to update the total count in the value profile. | |||
756 | static void | |||
757 | updateIDTMetaData(Instruction &Inst, | |||
758 | const SmallVectorImpl<InstrProfValueData> &CallTargets, | |||
759 | uint64_t Sum) { | |||
760 | uint32_t NumVals = 0; | |||
761 | // OldSum is the existing total count in the value profile data. | |||
762 | uint64_t OldSum = 0; | |||
763 | std::unique_ptr<InstrProfValueData[]> ValueData = | |||
764 | std::make_unique<InstrProfValueData[]>(MaxNumPromotions); | |||
765 | bool Valid = | |||
766 | getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, | |||
767 | ValueData.get(), NumVals, OldSum, true); | |||
768 | ||||
769 | DenseMap<uint64_t, uint64_t> ValueCountMap; | |||
770 | if (Sum == 0) { | |||
771 | assert((CallTargets.size() == 1 &&(static_cast <bool> ((CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && "If sum is 0, assume only one element in CallTargets " "with count being NOMORE_ICP_MAGICNUM") ? void (0) : __assert_fail ("(CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && \"If sum is 0, assume only one element in CallTargets \" \"with count being NOMORE_ICP_MAGICNUM\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 774, __extension__ __PRETTY_FUNCTION__)) | |||
772 | CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&(static_cast <bool> ((CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && "If sum is 0, assume only one element in CallTargets " "with count being NOMORE_ICP_MAGICNUM") ? void (0) : __assert_fail ("(CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && \"If sum is 0, assume only one element in CallTargets \" \"with count being NOMORE_ICP_MAGICNUM\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 774, __extension__ __PRETTY_FUNCTION__)) | |||
773 | "If sum is 0, assume only one element in CallTargets "(static_cast <bool> ((CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && "If sum is 0, assume only one element in CallTargets " "with count being NOMORE_ICP_MAGICNUM") ? void (0) : __assert_fail ("(CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && \"If sum is 0, assume only one element in CallTargets \" \"with count being NOMORE_ICP_MAGICNUM\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 774, __extension__ __PRETTY_FUNCTION__)) | |||
774 | "with count being NOMORE_ICP_MAGICNUM")(static_cast <bool> ((CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && "If sum is 0, assume only one element in CallTargets " "with count being NOMORE_ICP_MAGICNUM") ? void (0) : __assert_fail ("(CallTargets.size() == 1 && CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && \"If sum is 0, assume only one element in CallTargets \" \"with count being NOMORE_ICP_MAGICNUM\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 774, __extension__ __PRETTY_FUNCTION__)); | |||
775 | // Initialize ValueCountMap with existing value profile data. | |||
776 | if (Valid) { | |||
777 | for (uint32_t I = 0; I < NumVals; I++) | |||
778 | ValueCountMap[ValueData[I].Value] = ValueData[I].Count; | |||
779 | } | |||
780 | auto Pair = | |||
781 | ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count); | |||
782 | // If the target already exists in value profile, decrease the total | |||
783 | // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM. | |||
784 | if (!Pair.second) { | |||
785 | OldSum -= Pair.first->second; | |||
786 | Pair.first->second = NOMORE_ICP_MAGICNUM; | |||
787 | } | |||
788 | Sum = OldSum; | |||
789 | } else { | |||
790 | // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM | |||
791 | // counts in the value profile. | |||
792 | if (Valid) { | |||
793 | for (uint32_t I = 0; I < NumVals; I++) { | |||
794 | if (ValueData[I].Count == NOMORE_ICP_MAGICNUM) | |||
795 | ValueCountMap[ValueData[I].Value] = ValueData[I].Count; | |||
796 | } | |||
797 | } | |||
798 | ||||
799 | for (const auto &Data : CallTargets) { | |||
800 | auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); | |||
801 | if (Pair.second) | |||
802 | continue; | |||
803 | // The target represented by Data.Value has already been promoted. | |||
804 | // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease | |||
805 | // Sum by Data.Count. | |||
806 | assert(Sum >= Data.Count && "Sum should never be less than Data.Count")(static_cast <bool> (Sum >= Data.Count && "Sum should never be less than Data.Count" ) ? void (0) : __assert_fail ("Sum >= Data.Count && \"Sum should never be less than Data.Count\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 806, __extension__ __PRETTY_FUNCTION__)); | |||
807 | Sum -= Data.Count; | |||
808 | } | |||
809 | } | |||
810 | ||||
811 | SmallVector<InstrProfValueData, 8> NewCallTargets; | |||
812 | for (const auto &ValueCount : ValueCountMap) { | |||
813 | NewCallTargets.emplace_back( | |||
814 | InstrProfValueData{ValueCount.first, ValueCount.second}); | |||
815 | } | |||
816 | ||||
817 | llvm::sort(NewCallTargets, | |||
818 | [](const InstrProfValueData &L, const InstrProfValueData &R) { | |||
819 | if (L.Count != R.Count) | |||
820 | return L.Count > R.Count; | |||
821 | return L.Value > R.Value; | |||
822 | }); | |||
823 | ||||
824 | uint32_t MaxMDCount = | |||
825 | std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions)); | |||
826 | annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst, | |||
827 | NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount); | |||
828 | } | |||
829 | ||||
830 | /// Attempt to promote indirect call and also inline the promoted call. | |||
831 | /// | |||
832 | /// \param F Caller function. | |||
833 | /// \param Candidate ICP and inline candidate. | |||
834 | /// \param SumOrigin Original sum of target counts for indirect call before | |||
835 | /// promoting given candidate. | |||
836 | /// \param Sum Prorated sum of remaining target counts for indirect call | |||
837 | /// after promoting given candidate. | |||
838 | /// \param InlinedCallSite Output vector for new call sites exposed after | |||
839 | /// inlining. | |||
840 | bool SampleProfileLoader::tryPromoteAndInlineCandidate( | |||
841 | Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, | |||
842 | SmallVector<CallBase *, 8> *InlinedCallSite) { | |||
843 | auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName(); | |||
844 | auto R = SymbolMap.find(CalleeFunctionName); | |||
845 | if (R == SymbolMap.end() || !R->getValue()) | |||
846 | return false; | |||
847 | ||||
848 | auto &CI = *Candidate.CallInstr; | |||
849 | if (!doesHistoryAllowICP(CI, R->getValue()->getName())) | |||
850 | return false; | |||
851 | ||||
852 | const char *Reason = "Callee function not available"; | |||
853 | // R->getValue() != &F is to prevent promoting a recursive call. | |||
854 | // If it is a recursive call, we do not inline it as it could bloat | |||
855 | // the code exponentially. There is way to better handle this, e.g. | |||
856 | // clone the caller first, and inline the cloned caller if it is | |||
857 | // recursive. As llvm does not inline recursive calls, we will | |||
858 | // simply ignore it instead of handling it explicitly. | |||
859 | if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && | |||
860 | R->getValue()->hasFnAttribute("use-sample-profile") && | |||
861 | R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) { | |||
862 | // For promoted target, set its value with NOMORE_ICP_MAGICNUM count | |||
863 | // in the value profile metadata so the target won't be promoted again. | |||
864 | SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{ | |||
865 | Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}}; | |||
866 | updateIDTMetaData(CI, SortedCallTargets, 0); | |||
867 | ||||
868 | auto *DI = &pgo::promoteIndirectCall( | |||
869 | CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE); | |||
870 | if (DI) { | |||
871 | Sum -= Candidate.CallsiteCount; | |||
872 | // Do not prorate the indirect callsite distribution since the original | |||
873 | // distribution will be used to scale down non-promoted profile target | |||
874 | // counts later. By doing this we lose track of the real callsite count | |||
875 | // for the leftover indirect callsite as a trade off for accurate call | |||
876 | // target counts. | |||
877 | // TODO: Ideally we would have two separate factors, one for call site | |||
878 | // counts and one is used to prorate call target counts. | |||
879 | // Do not update the promoted direct callsite distribution at this | |||
880 | // point since the original distribution combined with the callee profile | |||
881 | // will be used to prorate callsites from the callee if inlined. Once not | |||
882 | // inlined, the direct callsite distribution should be prorated so that | |||
883 | // the it will reflect the real callsite counts. | |||
884 | Candidate.CallInstr = DI; | |||
885 | if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) { | |||
886 | bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); | |||
887 | if (!Inlined) { | |||
888 | // Prorate the direct callsite distribution so that it reflects real | |||
889 | // callsite counts. | |||
890 | setProbeDistributionFactor( | |||
891 | *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin); | |||
892 | } | |||
893 | return Inlined; | |||
894 | } | |||
895 | } | |||
896 | } else { | |||
897 | LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false) | |||
898 | << Candidate.CalleeSamples->getFuncName() << " because "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false) | |||
899 | << Reason << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nFailed to promote indirect call to " << Candidate.CalleeSamples->getFuncName() << " because " << Reason << "\n"; } } while (false); | |||
900 | } | |||
901 | return false; | |||
902 | } | |||
903 | ||||
904 | bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { | |||
905 | if (!ProfileSizeInline) | |||
906 | return false; | |||
907 | ||||
908 | Function *Callee = CallInst.getCalledFunction(); | |||
909 | if (Callee == nullptr) | |||
910 | return false; | |||
911 | ||||
912 | InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee), | |||
913 | GetAC, GetTLI); | |||
914 | ||||
915 | if (Cost.isNever()) | |||
916 | return false; | |||
917 | ||||
918 | if (Cost.isAlways()) | |||
919 | return true; | |||
920 | ||||
921 | return Cost.getCost() <= SampleColdCallSiteThreshold; | |||
922 | } | |||
923 | ||||
924 | void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( | |||
925 | const SmallVectorImpl<CallBase *> &Candidates, const Function &F, | |||
926 | bool Hot) { | |||
927 | for (auto I : Candidates) { | |||
928 | Function *CalledFunction = I->getCalledFunction(); | |||
929 | if (CalledFunction) { | |||
930 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineAttempt", | |||
931 | I->getDebugLoc(), I->getParent()) | |||
932 | << "previous inlining reattempted for " | |||
933 | << (Hot ? "hotness: '" : "size: '") | |||
934 | << ore::NV("Callee", CalledFunction) << "' into '" | |||
935 | << ore::NV("Caller", &F) << "'"); | |||
936 | } | |||
937 | } | |||
938 | } | |||
939 | ||||
940 | void SampleProfileLoader::findExternalInlineCandidate( | |||
941 | const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs, | |||
942 | const StringMap<Function *> &SymbolMap, uint64_t Threshold) { | |||
943 | assert(Samples && "expect non-null caller profile")(static_cast <bool> (Samples && "expect non-null caller profile" ) ? void (0) : __assert_fail ("Samples && \"expect non-null caller profile\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 943, __extension__ __PRETTY_FUNCTION__)); | |||
944 | ||||
945 | // For AutoFDO profile, retrieve candidate profiles by walking over | |||
946 | // the nested inlinee profiles. | |||
947 | if (!ProfileIsCS) { | |||
948 | Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold); | |||
949 | return; | |||
950 | } | |||
951 | ||||
952 | ContextTrieNode *Caller = | |||
953 | ContextTracker->getContextFor(Samples->getContext()); | |||
954 | std::queue<ContextTrieNode *> CalleeList; | |||
955 | CalleeList.push(Caller); | |||
956 | while (!CalleeList.empty()) { | |||
957 | ContextTrieNode *Node = CalleeList.front(); | |||
958 | CalleeList.pop(); | |||
959 | FunctionSamples *CalleeSample = Node->getFunctionSamples(); | |||
960 | // For CSSPGO profile, retrieve candidate profile by walking over the | |||
961 | // trie built for context profile. Note that also take call targets | |||
962 | // even if callee doesn't have a corresponding context profile. | |||
963 | if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold) | |||
964 | continue; | |||
965 | ||||
966 | StringRef Name = CalleeSample->getFuncName(); | |||
967 | Function *Func = SymbolMap.lookup(Name); | |||
968 | // Add to the import list only when it's defined out of module. | |||
969 | if (!Func || Func->isDeclaration()) | |||
970 | InlinedGUIDs.insert(FunctionSamples::getGUID(Name)); | |||
971 | ||||
972 | // Import hot CallTargets, which may not be available in IR because full | |||
973 | // profile annotation cannot be done until backend compilation in ThinLTO. | |||
974 | for (const auto &BS : CalleeSample->getBodySamples()) | |||
975 | for (const auto &TS : BS.second.getCallTargets()) | |||
976 | if (TS.getValue() > Threshold) { | |||
977 | StringRef CalleeName = CalleeSample->getFuncName(TS.getKey()); | |||
978 | const Function *Callee = SymbolMap.lookup(CalleeName); | |||
979 | if (!Callee || Callee->isDeclaration()) | |||
980 | InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName)); | |||
981 | } | |||
982 | ||||
983 | // Import hot child context profile associted with callees. Note that this | |||
984 | // may have some overlap with the call target loop above, but doing this | |||
985 | // based child context profile again effectively allow us to use the max of | |||
986 | // entry count and call target count to determine importing. | |||
987 | for (auto &Child : Node->getAllChildContext()) { | |||
988 | ContextTrieNode *CalleeNode = &Child.second; | |||
989 | CalleeList.push(CalleeNode); | |||
990 | } | |||
991 | } | |||
992 | } | |||
993 | ||||
994 | /// Iteratively inline hot callsites of a function. | |||
995 | /// | |||
996 | /// Iteratively traverse all callsites of the function \p F, and find if | |||
997 | /// the corresponding inlined instance exists and is hot in profile. If | |||
998 | /// it is hot enough, inline the callsites and adds new callsites of the | |||
999 | /// callee into the caller. If the call is an indirect call, first promote | |||
1000 | /// it to direct call. Each indirect call is limited with a single target. | |||
1001 | /// | |||
1002 | /// \param F function to perform iterative inlining. | |||
1003 | /// \param InlinedGUIDs a set to be updated to include all GUIDs that are | |||
1004 | /// inlined in the profiled binary. | |||
1005 | /// | |||
1006 | /// \returns True if there is any inline happened. | |||
1007 | bool SampleProfileLoader::inlineHotFunctions( | |||
1008 | Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { | |||
1009 | // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure | |||
1010 | // Profile symbol list is ignored when profile-sample-accurate is on. | |||
1011 | assert((!ProfAccForSymsInList ||(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1015, __extension__ __PRETTY_FUNCTION__)) | |||
1012 | (!ProfileSampleAccurate &&(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1015, __extension__ __PRETTY_FUNCTION__)) | |||
1013 | !F.hasFnAttribute("profile-sample-accurate"))) &&(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1015, __extension__ __PRETTY_FUNCTION__)) | |||
1014 | "ProfAccForSymsInList should be false when profile-sample-accurate "(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1015, __extension__ __PRETTY_FUNCTION__)) | |||
1015 | "is enabled")(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1015, __extension__ __PRETTY_FUNCTION__)); | |||
1016 | ||||
1017 | DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; | |||
1018 | bool Changed = false; | |||
1019 | bool LocalChanged = true; | |||
1020 | while (LocalChanged) { | |||
1021 | LocalChanged = false; | |||
1022 | SmallVector<CallBase *, 10> CIS; | |||
1023 | for (auto &BB : F) { | |||
1024 | bool Hot = false; | |||
1025 | SmallVector<CallBase *, 10> AllCandidates; | |||
1026 | SmallVector<CallBase *, 10> ColdCandidates; | |||
1027 | for (auto &I : BB.getInstList()) { | |||
1028 | const FunctionSamples *FS = nullptr; | |||
1029 | if (auto *CB = dyn_cast<CallBase>(&I)) { | |||
1030 | if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) { | |||
1031 | assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&(static_cast <bool> ((!FunctionSamples::UseMD5 || FS-> GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated" ) ? void (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1032, __extension__ __PRETTY_FUNCTION__)) | |||
1032 | "GUIDToFuncNameMap has to be populated")(static_cast <bool> ((!FunctionSamples::UseMD5 || FS-> GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated" ) ? void (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1032, __extension__ __PRETTY_FUNCTION__)); | |||
1033 | AllCandidates.push_back(CB); | |||
1034 | if (FS->getEntrySamples() > 0 || ProfileIsCS) | |||
1035 | LocalNotInlinedCallSites.try_emplace(CB, FS); | |||
1036 | if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) | |||
1037 | Hot = true; | |||
1038 | else if (shouldInlineColdCallee(*CB)) | |||
1039 | ColdCandidates.push_back(CB); | |||
1040 | } | |||
1041 | } | |||
1042 | } | |||
1043 | if (Hot || ExternalInlineAdvisor) { | |||
1044 | CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); | |||
1045 | emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true); | |||
1046 | } else { | |||
1047 | CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end()); | |||
1048 | emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false); | |||
1049 | } | |||
1050 | } | |||
1051 | for (CallBase *I : CIS) { | |||
1052 | Function *CalledFunction = I->getCalledFunction(); | |||
1053 | InlineCandidate Candidate = { | |||
1054 | I, | |||
1055 | LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] | |||
1056 | : nullptr, | |||
1057 | 0 /* dummy count */, 1.0 /* dummy distribution factor */}; | |||
1058 | // Do not inline recursive calls. | |||
1059 | if (CalledFunction == &F) | |||
1060 | continue; | |||
1061 | if (I->isIndirectCall()) { | |||
1062 | uint64_t Sum; | |||
1063 | for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { | |||
1064 | uint64_t SumOrigin = Sum; | |||
1065 | if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1066 | findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap, | |||
1067 | PSI->getOrCompHotCountThreshold()); | |||
1068 | continue; | |||
1069 | } | |||
1070 | if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) | |||
1071 | continue; | |||
1072 | ||||
1073 | Candidate = {I, FS, FS->getEntrySamples(), 1.0}; | |||
1074 | if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { | |||
1075 | LocalNotInlinedCallSites.erase(I); | |||
1076 | LocalChanged = true; | |||
1077 | } | |||
1078 | } | |||
1079 | } else if (CalledFunction && CalledFunction->getSubprogram() && | |||
1080 | !CalledFunction->isDeclaration()) { | |||
1081 | if (tryInlineCandidate(Candidate)) { | |||
1082 | LocalNotInlinedCallSites.erase(I); | |||
1083 | LocalChanged = true; | |||
1084 | } | |||
1085 | } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1086 | findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs, | |||
1087 | SymbolMap, | |||
1088 | PSI->getOrCompHotCountThreshold()); | |||
1089 | } | |||
1090 | } | |||
1091 | Changed |= LocalChanged; | |||
1092 | } | |||
1093 | ||||
1094 | // For CS profile, profile for not inlined context will be merged when | |||
1095 | // base profile is being trieved | |||
1096 | if (ProfileIsCS) | |||
1097 | return Changed; | |||
1098 | ||||
1099 | // Accumulate not inlined callsite information into notInlinedSamples | |||
1100 | for (const auto &Pair : LocalNotInlinedCallSites) { | |||
1101 | CallBase *I = Pair.getFirst(); | |||
1102 | Function *Callee = I->getCalledFunction(); | |||
1103 | if (!Callee || Callee->isDeclaration()) | |||
1104 | continue; | |||
1105 | ||||
1106 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "NotInline", | |||
1107 | I->getDebugLoc(), I->getParent()) | |||
1108 | << "previous inlining not repeated: '" | |||
1109 | << ore::NV("Callee", Callee) << "' into '" | |||
1110 | << ore::NV("Caller", &F) << "'"); | |||
1111 | ||||
1112 | ++NumCSNotInlined; | |||
1113 | const FunctionSamples *FS = Pair.getSecond(); | |||
1114 | if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { | |||
1115 | continue; | |||
1116 | } | |||
1117 | ||||
1118 | if (ProfileMergeInlinee) { | |||
1119 | // A function call can be replicated by optimizations like callsite | |||
1120 | // splitting or jump threading and the replicates end up sharing the | |||
1121 | // sample nested callee profile instead of slicing the original inlinee's | |||
1122 | // profile. We want to do merge exactly once by filtering out callee | |||
1123 | // profiles with a non-zero head sample count. | |||
1124 | if (FS->getHeadSamples() == 0) { | |||
1125 | // Use entry samples as head samples during the merge, as inlinees | |||
1126 | // don't have head samples. | |||
1127 | const_cast<FunctionSamples *>(FS)->addHeadSamples( | |||
1128 | FS->getEntrySamples()); | |||
1129 | ||||
1130 | // Note that we have to do the merge right after processing function. | |||
1131 | // This allows OutlineFS's profile to be used for annotation during | |||
1132 | // top-down processing of functions' annotation. | |||
1133 | FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); | |||
1134 | OutlineFS->merge(*FS); | |||
1135 | } | |||
1136 | } else { | |||
1137 | auto pair = | |||
1138 | notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); | |||
1139 | pair.first->second.entryCount += FS->getEntrySamples(); | |||
1140 | } | |||
1141 | } | |||
1142 | return Changed; | |||
1143 | } | |||
1144 | ||||
1145 | bool SampleProfileLoader::tryInlineCandidate( | |||
1146 | InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) { | |||
1147 | ||||
1148 | CallBase &CB = *Candidate.CallInstr; | |||
1149 | Function *CalledFunction = CB.getCalledFunction(); | |||
1150 | assert(CalledFunction && "Expect a callee with definition")(static_cast <bool> (CalledFunction && "Expect a callee with definition" ) ? void (0) : __assert_fail ("CalledFunction && \"Expect a callee with definition\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1150, __extension__ __PRETTY_FUNCTION__)); | |||
1151 | DebugLoc DLoc = CB.getDebugLoc(); | |||
1152 | BasicBlock *BB = CB.getParent(); | |||
1153 | ||||
1154 | InlineCost Cost = shouldInlineCandidate(Candidate); | |||
1155 | if (Cost.isNever()) { | |||
1156 | ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineFail", DLoc, BB) | |||
1157 | << "incompatible inlining"); | |||
1158 | return false; | |||
1159 | } | |||
1160 | ||||
1161 | if (!Cost) | |||
1162 | return false; | |||
1163 | ||||
1164 | InlineFunctionInfo IFI(nullptr, GetAC); | |||
1165 | IFI.UpdateProfile = false; | |||
1166 | if (InlineFunction(CB, IFI).isSuccess()) { | |||
1167 | // The call to InlineFunction erases I, so we can't pass it here. | |||
1168 | emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, | |||
1169 | true, CSINLINE_DEBUG"sample-profile" "-inline"); | |||
1170 | ||||
1171 | // Now populate the list of newly exposed call sites. | |||
1172 | if (InlinedCallSites) { | |||
1173 | InlinedCallSites->clear(); | |||
1174 | for (auto &I : IFI.InlinedCallSites) | |||
1175 | InlinedCallSites->push_back(I); | |||
1176 | } | |||
1177 | ||||
1178 | if (ProfileIsCS) | |||
1179 | ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); | |||
1180 | ++NumCSInlined; | |||
1181 | ||||
1182 | // Prorate inlined probes for a duplicated inlining callsite which probably | |||
1183 | // has a distribution less than 100%. Samples for an inlinee should be | |||
1184 | // distributed among the copies of the original callsite based on each | |||
1185 | // callsite's distribution factor for counts accuracy. Note that an inlined | |||
1186 | // probe may come with its own distribution factor if it has been duplicated | |||
1187 | // in the inlinee body. The two factor are multiplied to reflect the | |||
1188 | // aggregation of duplication. | |||
1189 | if (Candidate.CallsiteDistribution < 1) { | |||
1190 | for (auto &I : IFI.InlinedCallSites) { | |||
1191 | if (Optional<PseudoProbe> Probe = extractProbe(*I)) | |||
1192 | setProbeDistributionFactor(*I, Probe->Factor * | |||
1193 | Candidate.CallsiteDistribution); | |||
1194 | } | |||
1195 | NumDuplicatedInlinesite++; | |||
1196 | } | |||
1197 | ||||
1198 | return true; | |||
1199 | } | |||
1200 | return false; | |||
1201 | } | |||
1202 | ||||
1203 | bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, | |||
1204 | CallBase *CB) { | |||
1205 | assert(CB && "Expect non-null call instruction")(static_cast <bool> (CB && "Expect non-null call instruction" ) ? void (0) : __assert_fail ("CB && \"Expect non-null call instruction\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1205, __extension__ __PRETTY_FUNCTION__)); | |||
1206 | ||||
1207 | if (isa<IntrinsicInst>(CB)) | |||
1208 | return false; | |||
1209 | ||||
1210 | // Find the callee's profile. For indirect call, find hottest target profile. | |||
1211 | const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB); | |||
1212 | if (!CalleeSamples) | |||
1213 | return false; | |||
1214 | ||||
1215 | float Factor = 1.0; | |||
1216 | if (Optional<PseudoProbe> Probe = extractProbe(*CB)) | |||
1217 | Factor = Probe->Factor; | |||
1218 | ||||
1219 | uint64_t CallsiteCount = 0; | |||
1220 | ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent()); | |||
1221 | if (Weight) | |||
1222 | CallsiteCount = Weight.get(); | |||
1223 | if (CalleeSamples) | |||
1224 | CallsiteCount = std::max( | |||
1225 | CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); | |||
1226 | ||||
1227 | *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; | |||
1228 | return true; | |||
1229 | } | |||
1230 | ||||
1231 | InlineCost | |||
1232 | SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { | |||
1233 | std::unique_ptr<InlineAdvice> Advice = nullptr; | |||
1234 | if (ExternalInlineAdvisor) { | |||
1235 | Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); | |||
1236 | if (!Advice->isInliningRecommended()) { | |||
1237 | Advice->recordUnattemptedInlining(); | |||
1238 | return InlineCost::getNever("not previously inlined"); | |||
1239 | } | |||
1240 | Advice->recordInlining(); | |||
1241 | return InlineCost::getAlways("previously inlined"); | |||
1242 | } | |||
1243 | ||||
1244 | // Adjust threshold based on call site hotness, only do this for callsite | |||
1245 | // prioritized inliner because otherwise cost-benefit check is done earlier. | |||
1246 | int SampleThreshold = SampleColdCallSiteThreshold; | |||
1247 | if (CallsitePrioritizedInline) { | |||
1248 | if (Candidate.CallsiteCount > PSI->getHotCountThreshold()) | |||
1249 | SampleThreshold = SampleHotCallSiteThreshold; | |||
1250 | else if (!ProfileSizeInline) | |||
1251 | return InlineCost::getNever("cold callsite"); | |||
1252 | } | |||
1253 | ||||
1254 | Function *Callee = Candidate.CallInstr->getCalledFunction(); | |||
1255 | assert(Callee && "Expect a definition for inline candidate of direct call")(static_cast <bool> (Callee && "Expect a definition for inline candidate of direct call" ) ? void (0) : __assert_fail ("Callee && \"Expect a definition for inline candidate of direct call\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1255, __extension__ __PRETTY_FUNCTION__)); | |||
1256 | ||||
1257 | InlineParams Params = getInlineParams(); | |||
1258 | Params.ComputeFullInlineCost = true; | |||
1259 | // Checks if there is anything in the reachable portion of the callee at | |||
1260 | // this callsite that makes this inlining potentially illegal. Need to | |||
1261 | // set ComputeFullInlineCost, otherwise getInlineCost may return early | |||
1262 | // when cost exceeds threshold without checking all IRs in the callee. | |||
1263 | // The acutal cost does not matter because we only checks isNever() to | |||
1264 | // see if it is legal to inline the callsite. | |||
1265 | InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, | |||
1266 | GetTTI(*Callee), GetAC, GetTLI); | |||
1267 | ||||
1268 | // Honor always inline and never inline from call analyzer | |||
1269 | if (Cost.isNever() || Cost.isAlways()) | |||
1270 | return Cost; | |||
1271 | ||||
1272 | // For old FDO inliner, we inline the call site as long as cost is not | |||
1273 | // "Never". The cost-benefit check is done earlier. | |||
1274 | if (!CallsitePrioritizedInline) { | |||
1275 | return InlineCost::get(Cost.getCost(), INT_MAX2147483647); | |||
1276 | } | |||
1277 | ||||
1278 | // Otherwise only use the cost from call analyzer, but overwite threshold with | |||
1279 | // Sample PGO threshold. | |||
1280 | return InlineCost::get(Cost.getCost(), SampleThreshold); | |||
1281 | } | |||
1282 | ||||
1283 | bool SampleProfileLoader::inlineHotFunctionsWithPriority( | |||
1284 | Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { | |||
1285 | assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now")(static_cast <bool> (ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now" ) ? void (0) : __assert_fail ("ProfileIsCS && \"Prioritiy based inliner only works with CSSPGO now\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1285, __extension__ __PRETTY_FUNCTION__)); | |||
| ||||
1286 | ||||
1287 | // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure | |||
1288 | // Profile symbol list is ignored when profile-sample-accurate is on. | |||
1289 | assert((!ProfAccForSymsInList ||(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1293, __extension__ __PRETTY_FUNCTION__)) | |||
1290 | (!ProfileSampleAccurate &&(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1293, __extension__ __PRETTY_FUNCTION__)) | |||
1291 | !F.hasFnAttribute("profile-sample-accurate"))) &&(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1293, __extension__ __PRETTY_FUNCTION__)) | |||
1292 | "ProfAccForSymsInList should be false when profile-sample-accurate "(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1293, __extension__ __PRETTY_FUNCTION__)) | |||
1293 | "is enabled")(static_cast <bool> ((!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled") ? void (0) : __assert_fail ("(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1293, __extension__ __PRETTY_FUNCTION__)); | |||
1294 | ||||
1295 | // Populating worklist with initial call sites from root inliner, along | |||
1296 | // with call site weights. | |||
1297 | CandidateQueue CQueue; | |||
1298 | InlineCandidate NewCandidate; | |||
1299 | for (auto &BB : F) { | |||
1300 | for (auto &I : BB.getInstList()) { | |||
1301 | auto *CB = dyn_cast<CallBase>(&I); | |||
1302 | if (!CB) | |||
1303 | continue; | |||
1304 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1305 | CQueue.push(NewCandidate); | |||
1306 | } | |||
1307 | } | |||
1308 | ||||
1309 | // Cap the size growth from profile guided inlining. This is needed even | |||
1310 | // though cost of each inline candidate already accounts for callee size, | |||
1311 | // because with top-down inlining, we can grow inliner size significantly | |||
1312 | // with large number of smaller inlinees each pass the cost check. | |||
1313 | assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&(static_cast <bool> (ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? void (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1315, __extension__ __PRETTY_FUNCTION__)) | |||
1314 | "Max inline size limit should not be smaller than min inline size "(static_cast <bool> (ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? void (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1315, __extension__ __PRETTY_FUNCTION__)) | |||
1315 | "limit.")(static_cast <bool> (ProfileInlineLimitMax >= ProfileInlineLimitMin && "Max inline size limit should not be smaller than min inline size " "limit.") ? void (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1315, __extension__ __PRETTY_FUNCTION__)); | |||
1316 | unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit; | |||
1317 | SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); | |||
1318 | SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); | |||
1319 | if (ExternalInlineAdvisor) | |||
1320 | SizeLimit = std::numeric_limits<unsigned>::max(); | |||
1321 | ||||
1322 | // Perform iterative BFS call site prioritized inlining | |||
1323 | bool Changed = false; | |||
1324 | while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { | |||
1325 | InlineCandidate Candidate = CQueue.top(); | |||
1326 | CQueue.pop(); | |||
1327 | CallBase *I = Candidate.CallInstr; | |||
1328 | Function *CalledFunction = I->getCalledFunction(); | |||
1329 | ||||
1330 | if (CalledFunction == &F) | |||
1331 | continue; | |||
1332 | if (I->isIndirectCall()) { | |||
1333 | uint64_t Sum; | |||
1334 | auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); | |||
1335 | uint64_t SumOrigin = Sum; | |||
| ||||
1336 | Sum *= Candidate.CallsiteDistribution; | |||
1337 | for (const auto *FS : CalleeSamples) { | |||
1338 | // TODO: Consider disable pre-lTO ICP for MonoLTO as well | |||
1339 | if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1340 | findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap, | |||
1341 | PSI->getOrCompHotCountThreshold()); | |||
1342 | continue; | |||
1343 | } | |||
1344 | uint64_t EntryCountDistributed = | |||
1345 | FS->getEntrySamples() * Candidate.CallsiteDistribution; | |||
1346 | // In addition to regular inline cost check, we also need to make sure | |||
1347 | // ICP isn't introducing excessive speculative checks even if individual | |||
1348 | // target looks beneficial to promote and inline. That means we should | |||
1349 | // only do ICP when there's a small number dominant targets. | |||
1350 | if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) | |||
1351 | break; | |||
1352 | // TODO: Fix CallAnalyzer to handle all indirect calls. | |||
1353 | // For indirect call, we don't run CallAnalyzer to get InlineCost | |||
1354 | // before actual inlining. This is because we could see two different | |||
1355 | // types from the same definition, which makes CallAnalyzer choke as | |||
1356 | // it's expecting matching parameter type on both caller and callee | |||
1357 | // side. See example from PR18962 for the triggering cases (the bug was | |||
1358 | // fixed, but we generate different types). | |||
1359 | if (!PSI->isHotCount(EntryCountDistributed)) | |||
1360 | break; | |||
1361 | SmallVector<CallBase *, 8> InlinedCallSites; | |||
1362 | // Attach function profile for promoted indirect callee, and update | |||
1363 | // call site count for the promoted inline candidate too. | |||
1364 | Candidate = {I, FS, EntryCountDistributed, | |||
1365 | Candidate.CallsiteDistribution}; | |||
1366 | if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, | |||
1367 | &InlinedCallSites)) { | |||
1368 | for (auto *CB : InlinedCallSites) { | |||
1369 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1370 | CQueue.emplace(NewCandidate); | |||
1371 | } | |||
1372 | Changed = true; | |||
1373 | } | |||
1374 | } | |||
1375 | } else if (CalledFunction && CalledFunction->getSubprogram() && | |||
1376 | !CalledFunction->isDeclaration()) { | |||
1377 | SmallVector<CallBase *, 8> InlinedCallSites; | |||
1378 | if (tryInlineCandidate(Candidate, &InlinedCallSites)) { | |||
1379 | for (auto *CB : InlinedCallSites) { | |||
1380 | if (getInlineCandidate(&NewCandidate, CB)) | |||
1381 | CQueue.emplace(NewCandidate); | |||
1382 | } | |||
1383 | Changed = true; | |||
1384 | } | |||
1385 | } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { | |||
1386 | findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs, | |||
1387 | SymbolMap, PSI->getOrCompHotCountThreshold()); | |||
1388 | } | |||
1389 | } | |||
1390 | ||||
1391 | if (!CQueue.empty()) { | |||
1392 | if (SizeLimit == (unsigned)ProfileInlineLimitMax) | |||
1393 | ++NumCSInlinedHitMaxLimit; | |||
1394 | else if (SizeLimit == (unsigned)ProfileInlineLimitMin) | |||
1395 | ++NumCSInlinedHitMinLimit; | |||
1396 | else | |||
1397 | ++NumCSInlinedHitGrowthLimit; | |||
1398 | } | |||
1399 | ||||
1400 | return Changed; | |||
1401 | } | |||
1402 | ||||
1403 | /// Returns the sorted CallTargetMap \p M by count in descending order. | |||
1404 | static SmallVector<InstrProfValueData, 2> | |||
1405 | GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) { | |||
1406 | SmallVector<InstrProfValueData, 2> R; | |||
1407 | for (const auto &I : SampleRecord::SortCallTargets(M)) { | |||
1408 | R.emplace_back( | |||
1409 | InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); | |||
1410 | } | |||
1411 | return R; | |||
1412 | } | |||
1413 | ||||
1414 | // Generate MD_prof metadata for every branch instruction using the | |||
1415 | // edge weights computed during propagation. | |||
1416 | void SampleProfileLoader::generateMDProfMetadata(Function &F) { | |||
1417 | // Generate MD_prof metadata for every branch instruction using the | |||
1418 | // edge weights computed during propagation. | |||
1419 | LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nPropagation complete. Setting branch weights\n" ; } } while (false); | |||
1420 | LLVMContext &Ctx = F.getContext(); | |||
1421 | MDBuilder MDB(Ctx); | |||
1422 | for (auto &BI : F) { | |||
1423 | BasicBlock *BB = &BI; | |||
1424 | ||||
1425 | if (BlockWeights[BB]) { | |||
1426 | for (auto &I : BB->getInstList()) { | |||
1427 | if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) | |||
1428 | continue; | |||
1429 | if (!cast<CallBase>(I).getCalledFunction()) { | |||
1430 | const DebugLoc &DLoc = I.getDebugLoc(); | |||
1431 | if (!DLoc) | |||
1432 | continue; | |||
1433 | const DILocation *DIL = DLoc; | |||
1434 | const FunctionSamples *FS = findFunctionSamples(I); | |||
1435 | if (!FS) | |||
1436 | continue; | |||
1437 | auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL); | |||
1438 | auto T = FS->findCallTargetMapAt(CallSite); | |||
1439 | if (!T || T.get().empty()) | |||
1440 | continue; | |||
1441 | // Prorate the callsite counts to reflect what is already done to the | |||
1442 | // callsite, such as ICP or calliste cloning. | |||
1443 | if (FunctionSamples::ProfileIsProbeBased) { | |||
1444 | if (Optional<PseudoProbe> Probe = extractProbe(I)) { | |||
1445 | if (Probe->Factor < 1) | |||
1446 | T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); | |||
1447 | } | |||
1448 | } | |||
1449 | SmallVector<InstrProfValueData, 2> SortedCallTargets = | |||
1450 | GetSortedValueDataFromCallTargets(T.get()); | |||
1451 | uint64_t Sum = 0; | |||
1452 | for (const auto &C : T.get()) | |||
1453 | Sum += C.second; | |||
1454 | // With CSSPGO all indirect call targets are counted torwards the | |||
1455 | // original indirect call site in the profile, including both | |||
1456 | // inlined and non-inlined targets. | |||
1457 | if (!FunctionSamples::ProfileIsCS) { | |||
1458 | if (const FunctionSamplesMap *M = | |||
1459 | FS->findFunctionSamplesMapAt(CallSite)) { | |||
1460 | for (const auto &NameFS : *M) | |||
1461 | Sum += NameFS.second.getEntrySamples(); | |||
1462 | } | |||
1463 | } | |||
1464 | if (!Sum) | |||
1465 | continue; | |||
1466 | updateIDTMetaData(I, SortedCallTargets, Sum); | |||
1467 | } else if (!isa<IntrinsicInst>(&I)) { | |||
1468 | I.setMetadata(LLVMContext::MD_prof, | |||
1469 | MDB.createBranchWeights( | |||
1470 | {static_cast<uint32_t>(BlockWeights[BB])})); | |||
1471 | } | |||
1472 | } | |||
1473 | } | |||
1474 | Instruction *TI = BB->getTerminator(); | |||
1475 | if (TI->getNumSuccessors() == 1) | |||
1476 | continue; | |||
1477 | if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) && | |||
1478 | !isa<IndirectBrInst>(TI)) | |||
1479 | continue; | |||
1480 | ||||
1481 | DebugLoc BranchLoc = TI->getDebugLoc(); | |||
1482 | LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1483 | << ((BranchLoc) ? Twine(BranchLoc.getLine())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1484 | : Twine("<UNKNOWN LOCATION>"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false) | |||
1485 | << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\nGetting weights for branch at line " << ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>" )) << ".\n"; } } while (false); | |||
1486 | SmallVector<uint32_t, 4> Weights; | |||
1487 | uint32_t MaxWeight = 0; | |||
1488 | Instruction *MaxDestInst; | |||
1489 | for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { | |||
1490 | BasicBlock *Succ = TI->getSuccessor(I); | |||
1491 | Edge E = std::make_pair(BB, Succ); | |||
1492 | uint64_t Weight = EdgeWeights[E]; | |||
1493 | LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\t"; printEdgeWeight(dbgs (), E); } } while (false); | |||
1494 | // Use uint32_t saturated arithmetic to adjust the incoming weights, | |||
1495 | // if needed. Sample counts in profiles are 64-bit unsigned values, | |||
1496 | // but internally branch weights are expressed as 32-bit values. | |||
1497 | if (Weight > std::numeric_limits<uint32_t>::max()) { | |||
1498 | LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << " (saturated due to uint32_t overflow)" ; } } while (false); | |||
1499 | Weight = std::numeric_limits<uint32_t>::max(); | |||
1500 | } | |||
1501 | // Weight is added by one to avoid propagation errors introduced by | |||
1502 | // 0 weights. | |||
1503 | Weights.push_back(static_cast<uint32_t>(Weight + 1)); | |||
1504 | if (Weight != 0) { | |||
1505 | if (Weight > MaxWeight) { | |||
1506 | MaxWeight = Weight; | |||
1507 | MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime(); | |||
1508 | } | |||
1509 | } | |||
1510 | } | |||
1511 | ||||
1512 | uint64_t TempWeight; | |||
1513 | // Only set weights if there is at least one non-zero weight. | |||
1514 | // In any other case, let the analyzer set weights. | |||
1515 | // Do not set weights if the weights are present. In ThinLTO, the profile | |||
1516 | // annotation is done twice. If the first annotation already set the | |||
1517 | // weights, the second pass does not need to set it. | |||
1518 | if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) { | |||
1519 | LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "SUCCESS. Found non-zero weights.\n" ; } } while (false); | |||
1520 | TI->setMetadata(LLVMContext::MD_prof, | |||
1521 | MDB.createBranchWeights(Weights)); | |||
1522 | ORE->emit([&]() { | |||
1523 | return OptimizationRemark(DEBUG_TYPE"sample-profile", "PopularDest", MaxDestInst) | |||
1524 | << "most popular destination for conditional branches at " | |||
1525 | << ore::NV("CondBranchesLoc", BranchLoc); | |||
1526 | }); | |||
1527 | } else { | |||
1528 | LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "SKIPPED. All branch weights are zero.\n" ; } } while (false); | |||
1529 | } | |||
1530 | } | |||
1531 | } | |||
1532 | ||||
1533 | /// Once all the branch weights are computed, we emit the MD_prof | |||
1534 | /// metadata on BB using the computed values for each of its branches. | |||
1535 | /// | |||
1536 | /// \param F The function to query. | |||
1537 | /// | |||
1538 | /// \returns true if \p F was modified. Returns false, otherwise. | |||
1539 | bool SampleProfileLoader::emitAnnotations(Function &F) { | |||
1540 | bool Changed = false; | |||
1541 | ||||
1542 | if (FunctionSamples::ProfileIsProbeBased) { | |||
1543 | if (!ProbeManager->profileIsValid(F, *Samples)) { | |||
1544 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false) | |||
1545 | dbgs() << "Profile is invalid due to CFG mismatch for Function "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false) | |||
1546 | << F.getName())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName(); } } while (false); | |||
1547 | ++NumMismatchedProfile; | |||
1548 | return false; | |||
1549 | } | |||
1550 | ++NumMatchedProfile; | |||
1551 | } else { | |||
1552 | if (getFunctionLoc(F) == 0) | |||
1553 | return false; | |||
1554 | ||||
1555 | LLVM_DEBUG(dbgs() << "Line number for the first instruction in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F ) << "\n"; } } while (false) | |||
1556 | << F.getName() << ": " << getFunctionLoc(F) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F ) << "\n"; } } while (false); | |||
1557 | } | |||
1558 | ||||
1559 | DenseSet<GlobalValue::GUID> InlinedGUIDs; | |||
1560 | if (ProfileIsCS && CallsitePrioritizedInline) | |||
1561 | Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); | |||
1562 | else | |||
1563 | Changed |= inlineHotFunctions(F, InlinedGUIDs); | |||
1564 | ||||
1565 | Changed |= computeAndPropagateWeights(F, InlinedGUIDs); | |||
1566 | ||||
1567 | if (Changed) | |||
1568 | generateMDProfMetadata(F); | |||
1569 | ||||
1570 | emitCoverageRemarks(F); | |||
1571 | return Changed; | |||
1572 | } | |||
1573 | ||||
1574 | char SampleProfileLoaderLegacyPass::ID = 0; | |||
1575 | ||||
1576 | INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",static void *initializeSampleProfileLoaderLegacyPassPassOnce( PassRegistry &Registry) { | |||
1577 | "Sample Profile loader", false, false)static void *initializeSampleProfileLoaderLegacyPassPassOnce( PassRegistry &Registry) { | |||
1578 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
1579 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
1580 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
1581 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry); | |||
1582 | INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile" , &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SampleProfileLoaderLegacyPass>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag ; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag , initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry )); } | |||
1583 | "Sample Profile loader", false, false)PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile" , &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SampleProfileLoaderLegacyPass>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag ; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag , initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry )); } | |||
1584 | ||||
1585 | std::unique_ptr<ProfiledCallGraph> | |||
1586 | SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { | |||
1587 | std::unique_ptr<ProfiledCallGraph> ProfiledCG; | |||
1588 | if (ProfileIsCS) | |||
1589 | ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker); | |||
1590 | else | |||
1591 | ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles()); | |||
1592 | ||||
1593 | // Add all functions into the profiled call graph even if they are not in | |||
1594 | // the profile. This makes sure functions missing from the profile still | |||
1595 | // gets a chance to be processed. | |||
1596 | for (auto &Node : CG) { | |||
1597 | const auto *F = Node.first; | |||
1598 | if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) | |||
1599 | continue; | |||
1600 | ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F)); | |||
1601 | } | |||
1602 | ||||
1603 | return ProfiledCG; | |||
1604 | } | |||
1605 | ||||
1606 | std::vector<Function *> | |||
1607 | SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { | |||
1608 | std::vector<Function *> FunctionOrderList; | |||
1609 | FunctionOrderList.reserve(M.size()); | |||
1610 | ||||
1611 | if (!ProfileTopDownLoad && UseProfiledCallGraph) | |||
1612 | errs() << "WARNING: -use-profiled-call-graph ignored, should be used " | |||
1613 | "together with -sample-profile-top-down-load.\n"; | |||
1614 | ||||
1615 | if (!ProfileTopDownLoad || CG == nullptr) { | |||
1616 | if (ProfileMergeInlinee) { | |||
1617 | // Disable ProfileMergeInlinee if profile is not loaded in top down order, | |||
1618 | // because the profile for a function may be used for the profile | |||
1619 | // annotation of its outline copy before the profile merging of its | |||
1620 | // non-inlined inline instances, and that is not the way how | |||
1621 | // ProfileMergeInlinee is supposed to work. | |||
1622 | ProfileMergeInlinee = false; | |||
1623 | } | |||
1624 | ||||
1625 | for (Function &F : M) | |||
1626 | if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) | |||
1627 | FunctionOrderList.push_back(&F); | |||
1628 | return FunctionOrderList; | |||
1629 | } | |||
1630 | ||||
1631 | assert(&CG->getModule() == &M)(static_cast <bool> (&CG->getModule() == &M) ? void (0) : __assert_fail ("&CG->getModule() == &M" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1631, __extension__ __PRETTY_FUNCTION__)); | |||
1632 | ||||
1633 | if (UseProfiledCallGraph || | |||
1634 | (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) { | |||
1635 | // Use profiled call edges to augment the top-down order. There are cases | |||
1636 | // that the top-down order computed based on the static call graph doesn't | |||
1637 | // reflect real execution order. For example | |||
1638 | // | |||
1639 | // 1. Incomplete static call graph due to unknown indirect call targets. | |||
1640 | // Adjusting the order by considering indirect call edges from the | |||
1641 | // profile can enable the inlining of indirect call targets by allowing | |||
1642 | // the caller processed before them. | |||
1643 | // 2. Mutual call edges in an SCC. The static processing order computed for | |||
1644 | // an SCC may not reflect the call contexts in the context-sensitive | |||
1645 | // profile, thus may cause potential inlining to be overlooked. The | |||
1646 | // function order in one SCC is being adjusted to a top-down order based | |||
1647 | // on the profile to favor more inlining. This is only a problem with CS | |||
1648 | // profile. | |||
1649 | // 3. Transitive indirect call edges due to inlining. When a callee function | |||
1650 | // (say B) is inlined into into a caller function (say A) in LTO prelink, | |||
1651 | // every call edge originated from the callee B will be transferred to | |||
1652 | // the caller A. If any transferred edge (say A->C) is indirect, the | |||
1653 | // original profiled indirect edge B->C, even if considered, would not | |||
1654 | // enforce a top-down order from the caller A to the potential indirect | |||
1655 | // call target C in LTO postlink since the inlined callee B is gone from | |||
1656 | // the static call graph. | |||
1657 | // 4. #3 can happen even for direct call targets, due to functions defined | |||
1658 | // in header files. A header function (say A), when included into source | |||
1659 | // files, is defined multiple times but only one definition survives due | |||
1660 | // to ODR. Therefore, the LTO prelink inlining done on those dropped | |||
1661 | // definitions can be useless based on a local file scope. More | |||
1662 | // importantly, the inlinee (say B), once fully inlined to a | |||
1663 | // to-be-dropped A, will have no profile to consume when its outlined | |||
1664 | // version is compiled. This can lead to a profile-less prelink | |||
1665 | // compilation for the outlined version of B which may be called from | |||
1666 | // external modules. while this isn't easy to fix, we rely on the | |||
1667 | // postlink AutoFDO pipeline to optimize B. Since the survived copy of | |||
1668 | // the A can be inlined in its local scope in prelink, it may not exist | |||
1669 | // in the merged IR in postlink, and we'll need the profiled call edges | |||
1670 | // to enforce a top-down order for the rest of the functions. | |||
1671 | // | |||
1672 | // Considering those cases, a profiled call graph completely independent of | |||
1673 | // the static call graph is constructed based on profile data, where | |||
1674 | // function objects are not even needed to handle case #3 and case 4. | |||
1675 | // | |||
1676 | // Note that static callgraph edges are completely ignored since they | |||
1677 | // can be conflicting with profiled edges for cyclic SCCs and may result in | |||
1678 | // an SCC order incompatible with profile-defined one. Using strictly | |||
1679 | // profile order ensures a maximum inlining experience. On the other hand, | |||
1680 | // static call edges are not so important when they don't correspond to a | |||
1681 | // context in the profile. | |||
1682 | ||||
1683 | std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG); | |||
1684 | scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get()); | |||
1685 | while (!CGI.isAtEnd()) { | |||
1686 | for (ProfiledCallGraphNode *Node : *CGI) { | |||
1687 | Function *F = SymbolMap.lookup(Node->Name); | |||
1688 | if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) | |||
1689 | FunctionOrderList.push_back(F); | |||
1690 | } | |||
1691 | ++CGI; | |||
1692 | } | |||
1693 | } else { | |||
1694 | scc_iterator<CallGraph *> CGI = scc_begin(CG); | |||
1695 | while (!CGI.isAtEnd()) { | |||
1696 | for (CallGraphNode *Node : *CGI) { | |||
1697 | auto *F = Node->getFunction(); | |||
1698 | if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) | |||
1699 | FunctionOrderList.push_back(F); | |||
1700 | } | |||
1701 | ++CGI; | |||
1702 | } | |||
1703 | } | |||
1704 | ||||
1705 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1706 | dbgs() << "Function processing order:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1707 | for (auto F : reverse(FunctionOrderList)) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1708 | dbgs() << F->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1709 | }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false) | |||
1710 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { { dbgs() << "Function processing order:\n" ; for (auto F : reverse(FunctionOrderList)) { dbgs() << F->getName() << "\n"; } }; } } while (false); | |||
1711 | ||||
1712 | std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); | |||
1713 | return FunctionOrderList; | |||
1714 | } | |||
1715 | ||||
1716 | bool SampleProfileLoader::doInitialization(Module &M, | |||
1717 | FunctionAnalysisManager *FAM) { | |||
1718 | auto &Ctx = M.getContext(); | |||
1719 | ||||
1720 | auto ReaderOrErr = | |||
1721 | SampleProfileReader::create(Filename, Ctx, RemappingFilename); | |||
1722 | if (std::error_code EC = ReaderOrErr.getError()) { | |||
1723 | std::string Msg = "Could not open profile: " + EC.message(); | |||
1724 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1725 | return false; | |||
1726 | } | |||
1727 | Reader = std::move(ReaderOrErr.get()); | |||
1728 | Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink); | |||
1729 | // set module before reading the profile so reader may be able to only | |||
1730 | // read the function profiles which are used by the current module. | |||
1731 | Reader->setModule(&M); | |||
1732 | if (std::error_code EC = Reader->read()) { | |||
1733 | std::string Msg = "profile reading failed: " + EC.message(); | |||
1734 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1735 | return false; | |||
1736 | } | |||
1737 | ||||
1738 | PSL = Reader->getProfileSymbolList(); | |||
1739 | ||||
1740 | // While profile-sample-accurate is on, ignore symbol list. | |||
1741 | ProfAccForSymsInList = | |||
1742 | ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; | |||
1743 | if (ProfAccForSymsInList) { | |||
1744 | NamesInProfile.clear(); | |||
1745 | if (auto NameTable = Reader->getNameTable()) | |||
1746 | NamesInProfile.insert(NameTable->begin(), NameTable->end()); | |||
1747 | CoverageTracker.setProfAccForSymsInList(true); | |||
1748 | } | |||
1749 | ||||
1750 | if (FAM && !ProfileInlineReplayFile.empty()) { | |||
1751 | ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>( | |||
1752 | M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile, | |||
1753 | /*EmitRemarks=*/false); | |||
1754 | if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) | |||
1755 | ExternalInlineAdvisor.reset(); | |||
1756 | } | |||
1757 | ||||
1758 | // Apply tweaks if context-sensitive profile is available. | |||
1759 | if (Reader->profileIsCS()) { | |||
1760 | ProfileIsCS = true; | |||
1761 | FunctionSamples::ProfileIsCS = true; | |||
1762 | ||||
1763 | // Enable priority-base inliner and size inline by default for CSSPGO. | |||
1764 | if (!ProfileSizeInline.getNumOccurrences()) | |||
1765 | ProfileSizeInline = true; | |||
1766 | if (!CallsitePrioritizedInline.getNumOccurrences()) | |||
1767 | CallsitePrioritizedInline = true; | |||
1768 | ||||
1769 | // Tracker for profiles under different context | |||
1770 | ContextTracker = | |||
1771 | std::make_unique<SampleContextTracker>(Reader->getProfiles()); | |||
1772 | } | |||
1773 | ||||
1774 | // Load pseudo probe descriptors for probe-based function samples. | |||
1775 | if (Reader->profileIsProbeBased()) { | |||
1776 | ProbeManager = std::make_unique<PseudoProbeManager>(M); | |||
1777 | if (!ProbeManager->moduleIsProbed(M)) { | |||
1778 | const char *Msg = | |||
1779 | "Pseudo-probe-based profile requires SampleProfileProbePass"; | |||
1780 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); | |||
1781 | return false; | |||
1782 | } | |||
1783 | } | |||
1784 | ||||
1785 | return true; | |||
1786 | } | |||
1787 | ||||
1788 | ModulePass *llvm::createSampleProfileLoaderPass() { | |||
1789 | return new SampleProfileLoaderLegacyPass(); | |||
1790 | } | |||
1791 | ||||
1792 | ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { | |||
1793 | return new SampleProfileLoaderLegacyPass(Name); | |||
1794 | } | |||
1795 | ||||
1796 | bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, | |||
1797 | ProfileSummaryInfo *_PSI, CallGraph *CG) { | |||
1798 | GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); | |||
1799 | ||||
1800 | PSI = _PSI; | |||
1801 | if (M.getProfileSummary(/* IsCS */ false) == nullptr) { | |||
1802 | M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), | |||
1803 | ProfileSummary::PSK_Sample); | |||
1804 | PSI->refresh(); | |||
1805 | } | |||
1806 | // Compute the total number of samples collected in this profile. | |||
1807 | for (const auto &I : Reader->getProfiles()) | |||
1808 | TotalCollectedSamples += I.second.getTotalSamples(); | |||
1809 | ||||
1810 | auto Remapper = Reader->getRemapper(); | |||
1811 | // Populate the symbol map. | |||
1812 | for (const auto &N_F : M.getValueSymbolTable()) { | |||
1813 | StringRef OrigName = N_F.getKey(); | |||
1814 | Function *F = dyn_cast<Function>(N_F.getValue()); | |||
1815 | if (F == nullptr || OrigName.empty()) | |||
1816 | continue; | |||
1817 | SymbolMap[OrigName] = F; | |||
1818 | StringRef NewName = FunctionSamples::getCanonicalFnName(*F); | |||
1819 | if (OrigName != NewName && !NewName.empty()) { | |||
1820 | auto r = SymbolMap.insert(std::make_pair(NewName, F)); | |||
1821 | // Failiing to insert means there is already an entry in SymbolMap, | |||
1822 | // thus there are multiple functions that are mapped to the same | |||
1823 | // stripped name. In this case of name conflicting, set the value | |||
1824 | // to nullptr to avoid confusion. | |||
1825 | if (!r.second) | |||
1826 | r.first->second = nullptr; | |||
1827 | OrigName = NewName; | |||
1828 | } | |||
1829 | // Insert the remapped names into SymbolMap. | |||
1830 | if (Remapper) { | |||
1831 | if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) { | |||
1832 | if (*MapName != OrigName && !MapName->empty()) | |||
1833 | SymbolMap.insert(std::make_pair(*MapName, F)); | |||
1834 | } | |||
1835 | } | |||
1836 | } | |||
1837 | assert(SymbolMap.count(StringRef()) == 0 &&(static_cast <bool> (SymbolMap.count(StringRef()) == 0 && "No empty StringRef should be added in SymbolMap") ? void (0 ) : __assert_fail ("SymbolMap.count(StringRef()) == 0 && \"No empty StringRef should be added in SymbolMap\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1838, __extension__ __PRETTY_FUNCTION__)) | |||
1838 | "No empty StringRef should be added in SymbolMap")(static_cast <bool> (SymbolMap.count(StringRef()) == 0 && "No empty StringRef should be added in SymbolMap") ? void (0 ) : __assert_fail ("SymbolMap.count(StringRef()) == 0 && \"No empty StringRef should be added in SymbolMap\"" , "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1838, __extension__ __PRETTY_FUNCTION__)); | |||
1839 | ||||
1840 | bool retval = false; | |||
1841 | for (auto F : buildFunctionOrder(M, CG)) { | |||
1842 | assert(!F->isDeclaration())(static_cast <bool> (!F->isDeclaration()) ? void (0) : __assert_fail ("!F->isDeclaration()", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Transforms/IPO/SampleProfile.cpp" , 1842, __extension__ __PRETTY_FUNCTION__)); | |||
1843 | clearFunctionData(); | |||
1844 | retval |= runOnFunction(*F, AM); | |||
1845 | } | |||
1846 | ||||
1847 | // Account for cold calls not inlined.... | |||
1848 | if (!ProfileIsCS) | |||
1849 | for (const std::pair<Function *, NotInlinedProfileInfo> &pair : | |||
1850 | notInlinedCallInfo) | |||
1851 | updateProfileCallee(pair.first, pair.second.entryCount); | |||
1852 | ||||
1853 | return retval; | |||
1854 | } | |||
1855 | ||||
1856 | bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { | |||
1857 | ACT = &getAnalysis<AssumptionCacheTracker>(); | |||
1858 | TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); | |||
1859 | TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); | |||
1860 | ProfileSummaryInfo *PSI = | |||
1861 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); | |||
1862 | return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); | |||
1863 | } | |||
1864 | ||||
1865 | bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { | |||
1866 | LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sample-profile")) { dbgs() << "\n\nProcessing Function " << F.getName() << "\n"; } } while (false); | |||
1867 | DILocation2SampleMap.clear(); | |||
1868 | // By default the entry count is initialized to -1, which will be treated | |||
1869 | // conservatively by getEntryCount as the same as unknown (None). This is | |||
1870 | // to avoid newly added code to be treated as cold. If we have samples | |||
1871 | // this will be overwritten in emitAnnotations. | |||
1872 | uint64_t initialEntryCount = -1; | |||
1873 | ||||
1874 | ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; | |||
1875 | if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { | |||
1876 | // initialize all the function entry counts to 0. It means all the | |||
1877 | // functions without profile will be regarded as cold. | |||
1878 | initialEntryCount = 0; | |||
1879 | // profile-sample-accurate is a user assertion which has a higher precedence | |||
1880 | // than symbol list. When profile-sample-accurate is on, ignore symbol list. | |||
1881 | ProfAccForSymsInList = false; | |||
1882 | } | |||
1883 | CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList); | |||
1884 | ||||
1885 | // PSL -- profile symbol list include all the symbols in sampled binary. | |||
1886 | // If ProfileAccurateForSymsInList is enabled, PSL is used to treat | |||
1887 | // old functions without samples being cold, without having to worry | |||
1888 | // about new and hot functions being mistakenly treated as cold. | |||
1889 | if (ProfAccForSymsInList) { | |||
1890 | // Initialize the entry count to 0 for functions in the list. | |||
1891 | if (PSL->contains(F.getName())) | |||
1892 | initialEntryCount = 0; | |||
1893 | ||||
1894 | // Function in the symbol list but without sample will be regarded as | |||
1895 | // cold. To minimize the potential negative performance impact it could | |||
1896 | // have, we want to be a little conservative here saying if a function | |||
1897 | // shows up in the profile, no matter as outline function, inline instance | |||
1898 | // or call targets, treat the function as not being cold. This will handle | |||
1899 | // the cases such as most callsites of a function are inlined in sampled | |||
1900 | // binary but not inlined in current build (because of source code drift, | |||
1901 | // imprecise debug information, or the callsites are all cold individually | |||
1902 | // but not cold accumulatively...), so the outline function showing up as | |||
1903 | // cold in sampled binary will actually not be cold after current build. | |||
1904 | StringRef CanonName = FunctionSamples::getCanonicalFnName(F); | |||
1905 | if (NamesInProfile.count(CanonName)) | |||
1906 | initialEntryCount = -1; | |||
1907 | } | |||
1908 | ||||
1909 | // Initialize entry count when the function has no existing entry | |||
1910 | // count value. | |||
1911 | if (!F.getEntryCount().hasValue()) | |||
1912 | F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); | |||
1913 | std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; | |||
1914 | if (AM) { | |||
1915 | auto &FAM = | |||
1916 | AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent()) | |||
1917 | .getManager(); | |||
1918 | ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); | |||
1919 | } else { | |||
1920 | OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); | |||
1921 | ORE = OwnedORE.get(); | |||
1922 | } | |||
1923 | ||||
1924 | if (ProfileIsCS) | |||
1925 | Samples = ContextTracker->getBaseSamplesFor(F); | |||
1926 | else | |||
1927 | Samples = Reader->getSamplesFor(F); | |||
1928 | ||||
1929 | if (Samples && !Samples->empty()) | |||
1930 | return emitAnnotations(F); | |||
1931 | return false; | |||
1932 | } | |||
1933 | ||||
1934 | PreservedAnalyses SampleProfileLoaderPass::run(Module &M, | |||
1935 | ModuleAnalysisManager &AM) { | |||
1936 | FunctionAnalysisManager &FAM = | |||
1937 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
1938 | ||||
1939 | auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { | |||
1940 | return FAM.getResult<AssumptionAnalysis>(F); | |||
1941 | }; | |||
1942 | auto GetTTI = [&](Function &F) -> TargetTransformInfo & { | |||
1943 | return FAM.getResult<TargetIRAnalysis>(F); | |||
1944 | }; | |||
1945 | auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { | |||
1946 | return FAM.getResult<TargetLibraryAnalysis>(F); | |||
1947 | }; | |||
1948 | ||||
1949 | SampleProfileLoader SampleLoader( | |||
1950 | ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, | |||
1951 | ProfileRemappingFileName.empty() ? SampleProfileRemappingFile | |||
1952 | : ProfileRemappingFileName, | |||
1953 | LTOPhase, GetAssumptionCache, GetTTI, GetTLI); | |||
1954 | ||||
1955 | if (!SampleLoader.doInitialization(M, &FAM)) | |||
1956 | return PreservedAnalyses::all(); | |||
1957 | ||||
1958 | ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); | |||
1959 | CallGraph &CG = AM.getResult<CallGraphAnalysis>(M); | |||
1960 | if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) | |||
1961 | return PreservedAnalyses::all(); | |||
1962 | ||||
1963 | return PreservedAnalyses::none(); | |||
1964 | } |