Bug Summary

File:llvm/lib/Transforms/IPO/SampleProfile.cpp
Warning:line 1225, column 7
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SampleProfile.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/build-llvm/lib/Transforms/IPO -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/build-llvm/lib/Transforms/IPO -I /build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO -I /build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/build-llvm/lib/Transforms/IPO -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-03-05-015038-47150-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp
1//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileLoader transformation. This pass
10// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12// profile information in the given profile.
13//
14// This pass generates branch weight annotations on the IR:
15//
16// - prof: Represents branch weights. This annotation is added to branches
17// to indicate the weights of each edge coming out of the branch.
18// The weight of each edge is the weight of the target block for
19// that edge. The weight of a block B is computed as the maximum
20// number of samples found in B.
21//
22//===----------------------------------------------------------------------===//
23
24#include "llvm/Transforms/IPO/SampleProfile.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/None.h"
29#include "llvm/ADT/PriorityQueue.h"
30#include "llvm/ADT/SCCIterator.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringMap.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
38#include "llvm/Analysis/AssumptionCache.h"
39#include "llvm/Analysis/CallGraph.h"
40#include "llvm/Analysis/CallGraphSCCPass.h"
41#include "llvm/Analysis/InlineAdvisor.h"
42#include "llvm/Analysis/InlineCost.h"
43#include "llvm/Analysis/LoopInfo.h"
44#include "llvm/Analysis/OptimizationRemarkEmitter.h"
45#include "llvm/Analysis/PostDominators.h"
46#include "llvm/Analysis/ProfileSummaryInfo.h"
47#include "llvm/Analysis/ReplayInlineAdvisor.h"
48#include "llvm/Analysis/TargetLibraryInfo.h"
49#include "llvm/Analysis/TargetTransformInfo.h"
50#include "llvm/IR/BasicBlock.h"
51#include "llvm/IR/CFG.h"
52#include "llvm/IR/DebugInfoMetadata.h"
53#include "llvm/IR/DebugLoc.h"
54#include "llvm/IR/DiagnosticInfo.h"
55#include "llvm/IR/Dominators.h"
56#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
58#include "llvm/IR/InstrTypes.h"
59#include "llvm/IR/Instruction.h"
60#include "llvm/IR/Instructions.h"
61#include "llvm/IR/IntrinsicInst.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/MDBuilder.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/PassManager.h"
66#include "llvm/IR/ValueSymbolTable.h"
67#include "llvm/InitializePasses.h"
68#include "llvm/Pass.h"
69#include "llvm/ProfileData/InstrProf.h"
70#include "llvm/ProfileData/SampleProf.h"
71#include "llvm/ProfileData/SampleProfReader.h"
72#include "llvm/Support/Casting.h"
73#include "llvm/Support/CommandLine.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/ErrorOr.h"
77#include "llvm/Support/GenericDomTree.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Transforms/IPO.h"
80#include "llvm/Transforms/IPO/SampleContextTracker.h"
81#include "llvm/Transforms/IPO/SampleProfileProbe.h"
82#include "llvm/Transforms/Instrumentation.h"
83#include "llvm/Transforms/Utils/CallPromotionUtils.h"
84#include "llvm/Transforms/Utils/Cloning.h"
85#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
86#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
87#include <algorithm>
88#include <cassert>
89#include <cstdint>
90#include <functional>
91#include <limits>
92#include <map>
93#include <memory>
94#include <queue>
95#include <string>
96#include <system_error>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101using namespace sampleprof;
102using namespace llvm::sampleprofutil;
103using ProfileCount = Function::ProfileCount;
104#define DEBUG_TYPE"sample-profile" "sample-profile"
105#define CSINLINE_DEBUG"sample-profile" "-inline" DEBUG_TYPE"sample-profile" "-inline"
106
107STATISTIC(NumCSInlined,static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined"
, "Number of functions inlined with context sensitive profile"
}
108 "Number of functions inlined with context sensitive profile")static llvm::Statistic NumCSInlined = {"sample-profile", "NumCSInlined"
, "Number of functions inlined with context sensitive profile"
}
;
109STATISTIC(NumCSNotInlined,static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined"
, "Number of functions not inlined with context sensitive profile"
}
110 "Number of functions not inlined with context sensitive profile")static llvm::Statistic NumCSNotInlined = {"sample-profile", "NumCSNotInlined"
, "Number of functions not inlined with context sensitive profile"
}
;
111STATISTIC(NumMismatchedProfile,static llvm::Statistic NumMismatchedProfile = {"sample-profile"
, "NumMismatchedProfile", "Number of functions with CFG mismatched profile"
}
112 "Number of functions with CFG mismatched profile")static llvm::Statistic NumMismatchedProfile = {"sample-profile"
, "NumMismatchedProfile", "Number of functions with CFG mismatched profile"
}
;
113STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile")static llvm::Statistic NumMatchedProfile = {"sample-profile",
"NumMatchedProfile", "Number of functions with CFG matched profile"
}
;
114STATISTIC(NumDuplicatedInlinesite,static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile"
, "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor"
}
115 "Number of inlined callsites with a partial distribution factor")static llvm::Statistic NumDuplicatedInlinesite = {"sample-profile"
, "NumDuplicatedInlinesite", "Number of inlined callsites with a partial distribution factor"
}
;
116
117STATISTIC(NumCSInlinedHitMinLimit,static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile"
, "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit"
}
118 "Number of functions with FDO inline stopped due to min size limit")static llvm::Statistic NumCSInlinedHitMinLimit = {"sample-profile"
, "NumCSInlinedHitMinLimit", "Number of functions with FDO inline stopped due to min size limit"
}
;
119STATISTIC(NumCSInlinedHitMaxLimit,static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile"
, "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit"
}
120 "Number of functions with FDO inline stopped due to max size limit")static llvm::Statistic NumCSInlinedHitMaxLimit = {"sample-profile"
, "NumCSInlinedHitMaxLimit", "Number of functions with FDO inline stopped due to max size limit"
}
;
121STATISTIC(static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile"
, "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit"
}
122 NumCSInlinedHitGrowthLimit,static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile"
, "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit"
}
123 "Number of functions with FDO inline stopped due to growth size limit")static llvm::Statistic NumCSInlinedHitGrowthLimit = {"sample-profile"
, "NumCSInlinedHitGrowthLimit", "Number of functions with FDO inline stopped due to growth size limit"
}
;
124
125// Command line option to specify the file to read samples from. This is
126// mainly used for debugging.
127static cl::opt<std::string> SampleProfileFile(
128 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
129 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
130
131// The named file contains a set of transformations that may have been applied
132// to the symbol names between the program from which the sample data was
133// collected and the current program's symbols.
134static cl::opt<std::string> SampleProfileRemappingFile(
135 "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
136 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
137
138static cl::opt<bool> ProfileSampleAccurate(
139 "profile-sample-accurate", cl::Hidden, cl::init(false),
140 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
141 "callsite and function as having 0 samples. Otherwise, treat "
142 "un-sampled callsites and functions conservatively as unknown. "));
143
144static cl::opt<bool> ProfileAccurateForSymsInList(
145 "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
146 cl::init(true),
147 cl::desc("For symbols in profile symbol list, regard their profiles to "
148 "be accurate. It may be overriden by profile-sample-accurate. "));
149
150static cl::opt<bool> ProfileMergeInlinee(
151 "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
152 cl::desc("Merge past inlinee's profile to outline version if sample "
153 "profile loader decided not to inline a call site. It will "
154 "only be enabled when top-down order of profile loading is "
155 "enabled. "));
156
157static cl::opt<bool> ProfileTopDownLoad(
158 "sample-profile-top-down-load", cl::Hidden, cl::init(true),
159 cl::desc("Do profile annotation and inlining for functions in top-down "
160 "order of call graph during sample profile loading. It only "
161 "works for new pass manager. "));
162
163static cl::opt<bool> UseProfileIndirectCallEdges(
164 "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
165 cl::desc("Considering indirect call samples from profile when top-down "
166 "processing functions. Only CSSPGO is supported."));
167
168static cl::opt<bool> UseProfileTopDownOrder(
169 "use-profile-top-down-order", cl::init(false), cl::Hidden,
170 cl::desc("Process functions in one SCC in a top-down order "
171 "based on the input profile."));
172
173static cl::opt<bool> ProfileSizeInline(
174 "sample-profile-inline-size", cl::Hidden, cl::init(false),
175 cl::desc("Inline cold call sites in profile loader if it's beneficial "
176 "for code size."));
177
178static cl::opt<int> ProfileInlineGrowthLimit(
179 "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
180 cl::desc("The size growth ratio limit for proirity-based sample profile "
181 "loader inlining."));
182
183static cl::opt<int> ProfileInlineLimitMin(
184 "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
185 cl::desc("The lower bound of size growth limit for "
186 "proirity-based sample profile loader inlining."));
187
188static cl::opt<int> ProfileInlineLimitMax(
189 "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
190 cl::desc("The upper bound of size growth limit for "
191 "proirity-based sample profile loader inlining."));
192
193static cl::opt<int> ProfileICPThreshold(
194 "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
195 cl::desc(
196 "Relative hotness threshold for indirect "
197 "call promotion in proirity-based sample profile loader inlining."));
198
199static cl::opt<int> SampleHotCallSiteThreshold(
200 "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
201 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
202 "inlining."));
203
204static cl::opt<bool> CallsitePrioritizedInline(
205 "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
206 cl::init(false),
207 cl::desc("Use call site prioritized inlining for sample profile loader."
208 "Currently only CSSPGO is supported."));
209
210static cl::opt<int> SampleColdCallSiteThreshold(
211 "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
212 cl::desc("Threshold for inlining cold callsites"));
213
214static cl::opt<std::string> ProfileInlineReplayFile(
215 "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
216 cl::desc(
217 "Optimization remarks file containing inline remarks to be replayed "
218 "by inlining from sample profile loader."),
219 cl::Hidden);
220
221extern cl::opt<unsigned> MaxNumPromotions;
222
223namespace {
224
225using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
226using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
227using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
228using EdgeWeightMap = DenseMap<Edge, uint64_t>;
229using BlockEdgeMap =
230 DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
231
232class GUIDToFuncNameMapper {
233public:
234 GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
235 DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
236 : CurrentReader(Reader), CurrentModule(M),
237 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
238 if (!CurrentReader.useMD5())
239 return;
240
241 for (const auto &F : CurrentModule) {
242 StringRef OrigName = F.getName();
243 CurrentGUIDToFuncNameMap.insert(
244 {Function::getGUID(OrigName), OrigName});
245
246 // Local to global var promotion used by optimization like thinlto
247 // will rename the var and add suffix like ".llvm.xxx" to the
248 // original local name. In sample profile, the suffixes of function
249 // names are all stripped. Since it is possible that the mapper is
250 // built in post-thin-link phase and var promotion has been done,
251 // we need to add the substring of function name without the suffix
252 // into the GUIDToFuncNameMap.
253 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
254 if (CanonName != OrigName)
255 CurrentGUIDToFuncNameMap.insert(
256 {Function::getGUID(CanonName), CanonName});
257 }
258
259 // Update GUIDToFuncNameMap for each function including inlinees.
260 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
261 }
262
263 ~GUIDToFuncNameMapper() {
264 if (!CurrentReader.useMD5())
265 return;
266
267 CurrentGUIDToFuncNameMap.clear();
268
269 // Reset GUIDToFuncNameMap for of each function as they're no
270 // longer valid at this point.
271 SetGUIDToFuncNameMapForAll(nullptr);
272 }
273
274private:
275 void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
276 std::queue<FunctionSamples *> FSToUpdate;
277 for (auto &IFS : CurrentReader.getProfiles()) {
278 FSToUpdate.push(&IFS.second);
279 }
280
281 while (!FSToUpdate.empty()) {
282 FunctionSamples *FS = FSToUpdate.front();
283 FSToUpdate.pop();
284 FS->GUIDToFuncNameMap = Map;
285 for (const auto &ICS : FS->getCallsiteSamples()) {
286 const FunctionSamplesMap &FSMap = ICS.second;
287 for (auto &IFS : FSMap) {
288 FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
289 FSToUpdate.push(&FS);
290 }
291 }
292 }
293 }
294
295 SampleProfileReader &CurrentReader;
296 Module &CurrentModule;
297 DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
298};
299
300// Inline candidate used by iterative callsite prioritized inliner
301struct InlineCandidate {
302 CallBase *CallInstr;
303 const FunctionSamples *CalleeSamples;
304 // Prorated callsite count, which will be used to guide inlining. For example,
305 // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
306 // copies will get their own distribution factors and their prorated counts
307 // will be used to decide if they should be inlined independently.
308 uint64_t CallsiteCount;
309 // Call site distribution factor to prorate the profile samples for a
310 // duplicated callsite. Default value is 1.0.
311 float CallsiteDistribution;
312};
313
314// Inline candidate comparer using call site weight
315struct CandidateComparer {
316 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
317 if (LHS.CallsiteCount != RHS.CallsiteCount)
318 return LHS.CallsiteCount < RHS.CallsiteCount;
319
320 // Tie breaker using GUID so we have stable/deterministic inlining order
321 assert(LHS.CalleeSamples && RHS.CalleeSamples &&((LHS.CalleeSamples && RHS.CalleeSamples && "Expect non-null FunctionSamples"
) ? static_cast<void> (0) : __assert_fail ("LHS.CalleeSamples && RHS.CalleeSamples && \"Expect non-null FunctionSamples\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 322, __PRETTY_FUNCTION__))
322 "Expect non-null FunctionSamples")((LHS.CalleeSamples && RHS.CalleeSamples && "Expect non-null FunctionSamples"
) ? static_cast<void> (0) : __assert_fail ("LHS.CalleeSamples && RHS.CalleeSamples && \"Expect non-null FunctionSamples\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 322, __PRETTY_FUNCTION__))
;
323 return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
324 RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
325 }
326};
327
328using CandidateQueue =
329 PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
330 CandidateComparer>;
331
332/// Sample profile pass.
333///
334/// This pass reads profile data from the file specified by
335/// -sample-profile-file and annotates every affected function with the
336/// profile information found in that file.
337class SampleProfileLoader final
338 : public SampleProfileLoaderBaseImpl<BasicBlock> {
339public:
340 SampleProfileLoader(
341 StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
342 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
343 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
344 std::function<const TargetLibraryInfo &(Function &)> GetTLI)
345 : SampleProfileLoaderBaseImpl(std::string(Name)),
346 GetAC(std::move(GetAssumptionCache)),
347 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
348 RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
349
350 bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
351 bool runOnModule(Module &M, ModuleAnalysisManager *AM,
352 ProfileSummaryInfo *_PSI, CallGraph *CG);
353
354protected:
355 bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
356 bool emitAnnotations(Function &F);
357 ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
358 ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
359 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
360 const FunctionSamples *
361 findFunctionSamples(const Instruction &I) const override;
362 std::vector<const FunctionSamples *>
363 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
364 // Attempt to promote indirect call and also inline the promoted call
365 bool tryPromoteAndInlineCandidate(
366 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
367 uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
368 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
369 bool inlineHotFunctions(Function &F,
370 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
371 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
372 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
373 bool
374 tryInlineCandidate(InlineCandidate &Candidate,
375 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
376 bool
377 inlineHotFunctionsWithPriority(Function &F,
378 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
379 // Inline cold/small functions in addition to hot ones
380 bool shouldInlineColdCallee(CallBase &CallInst);
381 void emitOptimizationRemarksForInlineCandidates(
382 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
383 bool Hot);
384 std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
385 void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
386 void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
387 void generateMDProfMetadata(Function &F);
388
389 /// Map from function name to Function *. Used to find the function from
390 /// the function name. If the function name contains suffix, additional
391 /// entry is added to map from the stripped name to the function if there
392 /// is one-to-one mapping.
393 StringMap<Function *> SymbolMap;
394
395 std::function<AssumptionCache &(Function &)> GetAC;
396 std::function<TargetTransformInfo &(Function &)> GetTTI;
397 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
398
399 /// Profile tracker for different context.
400 std::unique_ptr<SampleContextTracker> ContextTracker;
401
402 /// Name of the profile remapping file to load.
403 std::string RemappingFilename;
404
405 /// Flag indicating whether the profile input loaded successfully.
406 bool ProfileIsValid = false;
407
408 /// Flag indicating whether input profile is context-sensitive
409 bool ProfileIsCS = false;
410
411 /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
412 ///
413 /// We need to know the LTO phase because for example in ThinLTOPrelink
414 /// phase, in annotation, we should not promote indirect calls. Instead,
415 /// we will mark GUIDs that needs to be annotated to the function.
416 ThinOrFullLTOPhase LTOPhase;
417
418 /// Profle Symbol list tells whether a function name appears in the binary
419 /// used to generate the current profile.
420 std::unique_ptr<ProfileSymbolList> PSL;
421
422 /// Total number of samples collected in this profile.
423 ///
424 /// This is the sum of all the samples collected in all the functions executed
425 /// at runtime.
426 uint64_t TotalCollectedSamples = 0;
427
428 // Information recorded when we declined to inline a call site
429 // because we have determined it is too cold is accumulated for
430 // each callee function. Initially this is just the entry count.
431 struct NotInlinedProfileInfo {
432 uint64_t entryCount;
433 };
434 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
435
436 // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
437 // all the function symbols defined or declared in current module.
438 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
439
440 // All the Names used in FunctionSamples including outline function
441 // names, inline instance names and call target names.
442 StringSet<> NamesInProfile;
443
444 // For symbol in profile symbol list, whether to regard their profiles
445 // to be accurate. It is mainly decided by existance of profile symbol
446 // list and -profile-accurate-for-symsinlist flag, but it can be
447 // overriden by -profile-sample-accurate or profile-sample-accurate
448 // attribute.
449 bool ProfAccForSymsInList;
450
451 // External inline advisor used to replay inline decision from remarks.
452 std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
453
454 // A pseudo probe helper to correlate the imported sample counts.
455 std::unique_ptr<PseudoProbeManager> ProbeManager;
456};
457
458class SampleProfileLoaderLegacyPass : public ModulePass {
459public:
460 // Class identification, replacement for typeinfo
461 static char ID;
462
463 SampleProfileLoaderLegacyPass(
464 StringRef Name = SampleProfileFile,
465 ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
466 : ModulePass(ID), SampleLoader(
467 Name, SampleProfileRemappingFile, LTOPhase,
468 [&](Function &F) -> AssumptionCache & {
469 return ACT->getAssumptionCache(F);
470 },
471 [&](Function &F) -> TargetTransformInfo & {
472 return TTIWP->getTTI(F);
473 },
474 [&](Function &F) -> TargetLibraryInfo & {
475 return TLIWP->getTLI(F);
476 }) {
477 initializeSampleProfileLoaderLegacyPassPass(
478 *PassRegistry::getPassRegistry());
479 }
480
481 void dump() { SampleLoader.dump(); }
482
483 bool doInitialization(Module &M) override {
484 return SampleLoader.doInitialization(M);
485 }
486
487 StringRef getPassName() const override { return "Sample profile pass"; }
488 bool runOnModule(Module &M) override;
489
490 void getAnalysisUsage(AnalysisUsage &AU) const override {
491 AU.addRequired<AssumptionCacheTracker>();
492 AU.addRequired<TargetTransformInfoWrapperPass>();
493 AU.addRequired<TargetLibraryInfoWrapperPass>();
494 AU.addRequired<ProfileSummaryInfoWrapperPass>();
495 }
496
497private:
498 SampleProfileLoader SampleLoader;
499 AssumptionCacheTracker *ACT = nullptr;
500 TargetTransformInfoWrapperPass *TTIWP = nullptr;
501 TargetLibraryInfoWrapperPass *TLIWP = nullptr;
502};
503
504} // end anonymous namespace
505
506ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
507 if (FunctionSamples::ProfileIsProbeBased)
508 return getProbeWeight(Inst);
509
510 const DebugLoc &DLoc = Inst.getDebugLoc();
511 if (!DLoc)
512 return std::error_code();
513
514 // Ignore all intrinsics, phinodes and branch instructions.
515 // Branch and phinodes instruction usually contains debug info from sources
516 // outside of the residing basic block, thus we ignore them during annotation.
517 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
518 return std::error_code();
519
520 // If a direct call/invoke instruction is inlined in profile
521 // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
522 // it means that the inlined callsite has no sample, thus the call
523 // instruction should have 0 count.
524 if (!ProfileIsCS)
525 if (const auto *CB = dyn_cast<CallBase>(&Inst))
526 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
527 return 0;
528
529 return getInstWeightImpl(Inst);
530}
531
532ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
533 assert(FunctionSamples::ProfileIsProbeBased &&((FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based"
) ? static_cast<void> (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 534, __PRETTY_FUNCTION__))
534 "Profile is not pseudo probe based")((FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based"
) ? static_cast<void> (0) : __assert_fail ("FunctionSamples::ProfileIsProbeBased && \"Profile is not pseudo probe based\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 534, __PRETTY_FUNCTION__))
;
535 Optional<PseudoProbe> Probe = extractProbe(Inst);
536 if (!Probe)
537 return std::error_code();
538
539 // Ignore danling probes since they are logically deleted and should not
540 // consume any profile samples.
541 if (Probe->isDangling())
542 return std::error_code();
543
544 const FunctionSamples *FS = findFunctionSamples(Inst);
545 if (!FS)
546 return std::error_code();
547
548 // If a direct call/invoke instruction is inlined in profile
549 // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
550 // it means that the inlined callsite has no sample, thus the call
551 // instruction should have 0 count.
552 if (const auto *CB = dyn_cast<CallBase>(&Inst))
553 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
554 return 0;
555
556 const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
557 if (R) {
558 uint64_t Samples = R.get() * Probe->Factor;
559 bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
560 if (FirstMark) {
561 ORE->emit([&]() {
562 OptimizationRemarkAnalysis Remark(DEBUG_TYPE"sample-profile", "AppliedSamples", &Inst);
563 Remark << "Applied " << ore::NV("NumSamples", Samples);
564 Remark << " samples from profile (ProbeId=";
565 Remark << ore::NV("ProbeId", Probe->Id);
566 Remark << ", Factor=";
567 Remark << ore::NV("Factor", Probe->Factor);
568 Remark << ", OriginalSamples=";
569 Remark << ore::NV("OriginalSamples", R.get());
570 Remark << ")";
571 return Remark;
572 });
573 }
574 LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Instdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << " " << Probe->
Id << ":" << Inst << " - weight: " <<
R.get() << " - factor: " << format("%0.2f", Probe
->Factor) << ")\n"; } } while (false)
575 << " - weight: " << R.get() << " - factor: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << " " << Probe->
Id << ":" << Inst << " - weight: " <<
R.get() << " - factor: " << format("%0.2f", Probe
->Factor) << ")\n"; } } while (false)
576 << format("%0.2f", Probe->Factor) << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << " " << Probe->
Id << ":" << Inst << " - weight: " <<
R.get() << " - factor: " << format("%0.2f", Probe
->Factor) << ")\n"; } } while (false)
;
577 return Samples;
578 }
579 return R;
580}
581
582/// Get the FunctionSamples for a call instruction.
583///
584/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
585/// instance in which that call instruction is calling to. It contains
586/// all samples that resides in the inlined instance. We first find the
587/// inlined instance in which the call instruction is from, then we
588/// traverse its children to find the callsite with the matching
589/// location.
590///
591/// \param Inst Call/Invoke instruction to query.
592///
593/// \returns The FunctionSamples pointer to the inlined instance.
594const FunctionSamples *
595SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
596 const DILocation *DIL = Inst.getDebugLoc();
597 if (!DIL) {
598 return nullptr;
599 }
600
601 StringRef CalleeName;
602 if (Function *Callee = Inst.getCalledFunction())
603 CalleeName = FunctionSamples::getCanonicalFnName(*Callee);
604
605 if (ProfileIsCS)
606 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
607
608 const FunctionSamples *FS = findFunctionSamples(Inst);
609 if (FS == nullptr)
610 return nullptr;
611
612 return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
613 CalleeName, Reader->getRemapper());
614}
615
616/// Returns a vector of FunctionSamples that are the indirect call targets
617/// of \p Inst. The vector is sorted by the total number of samples. Stores
618/// the total call count of the indirect call in \p Sum.
619std::vector<const FunctionSamples *>
620SampleProfileLoader::findIndirectCallFunctionSamples(
621 const Instruction &Inst, uint64_t &Sum) const {
622 const DILocation *DIL = Inst.getDebugLoc();
623 std::vector<const FunctionSamples *> R;
624
625 if (!DIL) {
20
Assuming 'DIL' is null
21
Taking true branch
626 return R;
22
Returning without writing to 'Sum'
627 }
628
629 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
630 assert(L && R && "Expect non-null FunctionSamples")((L && R && "Expect non-null FunctionSamples"
) ? static_cast<void> (0) : __assert_fail ("L && R && \"Expect non-null FunctionSamples\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 630, __PRETTY_FUNCTION__))
;
631 if (L->getEntrySamples() != R->getEntrySamples())
632 return L->getEntrySamples() > R->getEntrySamples();
633 return FunctionSamples::getGUID(L->getName()) <
634 FunctionSamples::getGUID(R->getName());
635 };
636
637 if (ProfileIsCS) {
638 auto CalleeSamples =
639 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
640 if (CalleeSamples.empty())
641 return R;
642
643 // For CSSPGO, we only use target context profile's entry count
644 // as that already includes both inlined callee and non-inlined ones..
645 Sum = 0;
646 for (const auto *const FS : CalleeSamples) {
647 Sum += FS->getEntrySamples();
648 R.push_back(FS);
649 }
650 llvm::sort(R, FSCompare);
651 return R;
652 }
653
654 const FunctionSamples *FS = findFunctionSamples(Inst);
655 if (FS == nullptr)
656 return R;
657
658 auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
659 auto T = FS->findCallTargetMapAt(CallSite);
660 Sum = 0;
661 if (T)
662 for (const auto &T_C : T.get())
663 Sum += T_C.second;
664 if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
665 if (M->empty())
666 return R;
667 for (const auto &NameFS : *M) {
668 Sum += NameFS.second.getEntrySamples();
669 R.push_back(&NameFS.second);
670 }
671 llvm::sort(R, FSCompare);
672 }
673 return R;
674}
675
676const FunctionSamples *
677SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
678 if (FunctionSamples::ProfileIsProbeBased) {
679 Optional<PseudoProbe> Probe = extractProbe(Inst);
680 if (!Probe)
681 return nullptr;
682 }
683
684 const DILocation *DIL = Inst.getDebugLoc();
685 if (!DIL)
686 return Samples;
687
688 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
689 if (it.second) {
690 if (ProfileIsCS)
691 it.first->second = ContextTracker->getContextSamplesFor(DIL);
692 else
693 it.first->second =
694 Samples->findFunctionSamples(DIL, Reader->getRemapper());
695 }
696 return it.first->second;
697}
698
699/// If the profile count for the promotion candidate \p Candidate is 0,
700/// it means \p Candidate has already been promoted for \p Inst.
701static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) {
702 uint32_t NumVals = 0;
703 uint64_t TotalCount = 0;
704 std::unique_ptr<InstrProfValueData[]> ValueData =
705 std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
706 bool Valid =
707 getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
708 ValueData.get(), NumVals, TotalCount, true);
709 if (Valid) {
710 for (uint32_t I = 0; I < NumVals; I++) {
711 // If the promotion candidate has 0 count in the metadata, it
712 // means the candidate has been promoted for this indirect call.
713 if (ValueData[I].Value == Function::getGUID(Candidate))
714 return ValueData[I].Count == 0;
715 }
716 }
717 return false;
718}
719
720/// Update indirect call target profile metadata for \p Inst. If \p Total
721/// is given, set TotalCount of call targets counts to \p Total, otherwise
722/// keep the original value in metadata.
723static void
724updateIDTMetaData(Instruction &Inst,
725 const SmallVectorImpl<InstrProfValueData> &CallTargets,
726 uint64_t Total = 0) {
727 DenseMap<uint64_t, uint64_t> ValueCountMap;
728
729 uint32_t NumVals = 0;
730 uint64_t TotalCount = 0;
731 std::unique_ptr<InstrProfValueData[]> ValueData =
732 std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
733 bool Valid =
734 getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
735 ValueData.get(), NumVals, TotalCount, true);
736 if (Valid) {
737 for (uint32_t I = 0; I < NumVals; I++)
738 ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
739 }
740
741 for (const auto &Data : CallTargets) {
742 auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
743 if (Pair.second)
744 continue;
745 // Update existing profile count of the call target if it is not 0.
746 // If it is 0, the call target has been promoted so keep it as 0.
747 if (Pair.first->second != 0)
748 Pair.first->second = Data.Count;
749 else {
750 assert(Total >= Data.Count && "Total should be >= Data.Count")((Total >= Data.Count && "Total should be >= Data.Count"
) ? static_cast<void> (0) : __assert_fail ("Total >= Data.Count && \"Total should be >= Data.Count\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 750, __PRETTY_FUNCTION__))
;
751 Total -= Data.Count;
752 }
753 }
754
755 SmallVector<InstrProfValueData, 8> NewCallTargets;
756 for (const auto &ValueCount : ValueCountMap) {
757 NewCallTargets.emplace_back(
758 InstrProfValueData{ValueCount.first, ValueCount.second});
759 }
760 llvm::sort(NewCallTargets,
761 [](const InstrProfValueData &L, const InstrProfValueData &R) {
762 if (L.Count != R.Count)
763 return L.Count > R.Count;
764 return L.Value > R.Value;
765 });
766 annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
767 NewCallTargets, Total ? Total : TotalCount,
768 IPVK_IndirectCallTarget, NewCallTargets.size());
769}
770
771/// Attempt to promote indirect call and also inline the promoted call.
772///
773/// \param F Caller function.
774/// \param Candidate ICP and inline candidate.
775/// \param Sum Sum of target counts for indirect call.
776/// \param PromotedInsns Map to keep track of indirect call already processed.
777/// \param InlinedCallSite Output vector for new call sites exposed after
778/// inlining.
779bool SampleProfileLoader::tryPromoteAndInlineCandidate(
780 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
781 DenseSet<Instruction *> &PromotedInsns,
782 SmallVector<CallBase *, 8> *InlinedCallSite) {
783 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
784 auto R = SymbolMap.find(CalleeFunctionName);
785 if (R == SymbolMap.end() || !R->getValue())
786 return false;
787
788 auto &CI = *Candidate.CallInstr;
789 if (isPromotedBefore(CI, R->getValue()->getName()))
790 return false;
791
792 const char *Reason = "Callee function not available";
793 // R->getValue() != &F is to prevent promoting a recursive call.
794 // If it is a recursive call, we do not inline it as it could bloat
795 // the code exponentially. There is way to better handle this, e.g.
796 // clone the caller first, and inline the cloned caller if it is
797 // recursive. As llvm does not inline recursive calls, we will
798 // simply ignore it instead of handling it explicitly.
799 if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
800 R->getValue()->hasFnAttribute("use-sample-profile") &&
801 R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
802 // For promoted target, save 0 count in the value profile metadata so
803 // the target won't be promoted again.
804 SmallVector<InstrProfValueData, 1> SortedCallTargets = {
805 InstrProfValueData{Function::getGUID(R->getValue()->getName()), 0}};
806 updateIDTMetaData(CI, SortedCallTargets);
807
808 auto *DI = &pgo::promoteIndirectCall(
809 CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
810 if (DI) {
811 Sum -= Candidate.CallsiteCount;
812 // Prorate the indirect callsite distribution.
813 // Do not update the promoted direct callsite distribution at this
814 // point since the original distribution combined with the callee
815 // profile will be used to prorate callsites from the callee if
816 // inlined. Once not inlined, the direct callsite distribution should
817 // be prorated so that the it will reflect the real callsite counts.
818 setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum /
819 SumOrigin);
820 PromotedInsns.insert(Candidate.CallInstr);
821 Candidate.CallInstr = DI;
822 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
823 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
824 if (!Inlined) {
825 // Prorate the direct callsite distribution so that it reflects real
826 // callsite counts.
827 setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
828 Candidate.CallsiteCount /
829 SumOrigin);
830 }
831 return Inlined;
832 }
833 }
834 } else {
835 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nFailed to promote indirect call to "
<< Candidate.CalleeSamples->getFuncName() << " because "
<< Reason << "\n"; } } while (false)
836 << Candidate.CalleeSamples->getFuncName() << " because "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nFailed to promote indirect call to "
<< Candidate.CalleeSamples->getFuncName() << " because "
<< Reason << "\n"; } } while (false)
837 << Reason << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nFailed to promote indirect call to "
<< Candidate.CalleeSamples->getFuncName() << " because "
<< Reason << "\n"; } } while (false)
;
838 }
839 return false;
840}
841
842bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
843 if (!ProfileSizeInline)
844 return false;
845
846 Function *Callee = CallInst.getCalledFunction();
847 if (Callee == nullptr)
848 return false;
849
850 InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
851 GetAC, GetTLI);
852
853 if (Cost.isNever())
854 return false;
855
856 if (Cost.isAlways())
857 return true;
858
859 return Cost.getCost() <= SampleColdCallSiteThreshold;
860}
861
862void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
863 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
864 bool Hot) {
865 for (auto I : Candidates) {
866 Function *CalledFunction = I->getCalledFunction();
867 if (CalledFunction) {
868 ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineAttempt",
869 I->getDebugLoc(), I->getParent())
870 << "previous inlining reattempted for "
871 << (Hot ? "hotness: '" : "size: '")
872 << ore::NV("Callee", CalledFunction) << "' into '"
873 << ore::NV("Caller", &F) << "'");
874 }
875 }
876}
877
878/// Iteratively inline hot callsites of a function.
879///
880/// Iteratively traverse all callsites of the function \p F, and find if
881/// the corresponding inlined instance exists and is hot in profile. If
882/// it is hot enough, inline the callsites and adds new callsites of the
883/// callee into the caller. If the call is an indirect call, first promote
884/// it to direct call. Each indirect call is limited with a single target.
885///
886/// \param F function to perform iterative inlining.
887/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
888/// inlined in the profiled binary.
889///
890/// \returns True if there is any inline happened.
891bool SampleProfileLoader::inlineHotFunctions(
892 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
893 DenseSet<Instruction *> PromotedInsns;
894
895 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
896 // Profile symbol list is ignored when profile-sample-accurate is on.
897 assert((!ProfAccForSymsInList ||(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 901, __PRETTY_FUNCTION__))
898 (!ProfileSampleAccurate &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 901, __PRETTY_FUNCTION__))
899 !F.hasFnAttribute("profile-sample-accurate"))) &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 901, __PRETTY_FUNCTION__))
900 "ProfAccForSymsInList should be false when profile-sample-accurate "(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 901, __PRETTY_FUNCTION__))
901 "is enabled")(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 901, __PRETTY_FUNCTION__))
;
902
903 DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
904 bool Changed = false;
905 bool LocalChanged = true;
906 while (LocalChanged) {
907 LocalChanged = false;
908 SmallVector<CallBase *, 10> CIS;
909 for (auto &BB : F) {
910 bool Hot = false;
911 SmallVector<CallBase *, 10> AllCandidates;
912 SmallVector<CallBase *, 10> ColdCandidates;
913 for (auto &I : BB.getInstList()) {
914 const FunctionSamples *FS = nullptr;
915 if (auto *CB = dyn_cast<CallBase>(&I)) {
916 if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
917 assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&(((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated") ? static_cast<void
> (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 918, __PRETTY_FUNCTION__))
918 "GUIDToFuncNameMap has to be populated")(((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated") ? static_cast<void
> (0) : __assert_fail ("(!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && \"GUIDToFuncNameMap has to be populated\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 918, __PRETTY_FUNCTION__))
;
919 AllCandidates.push_back(CB);
920 if (FS->getEntrySamples() > 0 || ProfileIsCS)
921 LocalNotInlinedCallSites.try_emplace(CB, FS);
922 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
923 Hot = true;
924 else if (shouldInlineColdCallee(*CB))
925 ColdCandidates.push_back(CB);
926 }
927 }
928 }
929 if (Hot || ExternalInlineAdvisor) {
930 CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
931 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
932 } else {
933 CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
934 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
935 }
936 }
937 for (CallBase *I : CIS) {
938 Function *CalledFunction = I->getCalledFunction();
939 InlineCandidate Candidate = {
940 I,
941 LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
942 : nullptr,
943 0 /* dummy count */, 1.0 /* dummy distribution factor */};
944 // Do not inline recursive calls.
945 if (CalledFunction == &F)
946 continue;
947 if (I->isIndirectCall()) {
948 if (PromotedInsns.count(I))
949 continue;
950 uint64_t Sum;
951 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
952 uint64_t SumOrigin = Sum;
953 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
954 FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
955 PSI->getOrCompHotCountThreshold());
956 continue;
957 }
958 if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
959 continue;
960
961 Candidate = {I, FS, FS->getEntrySamples(), 1.0};
962 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
963 PromotedInsns)) {
964 LocalNotInlinedCallSites.erase(I);
965 LocalChanged = true;
966 }
967 }
968 } else if (CalledFunction && CalledFunction->getSubprogram() &&
969 !CalledFunction->isDeclaration()) {
970 if (tryInlineCandidate(Candidate)) {
971 LocalNotInlinedCallSites.erase(I);
972 LocalChanged = true;
973 }
974 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
975 findCalleeFunctionSamples(*I)->findInlinedFunctions(
976 InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
977 }
978 }
979 Changed |= LocalChanged;
980 }
981
982 // For CS profile, profile for not inlined context will be merged when
983 // base profile is being trieved
984 if (ProfileIsCS)
985 return Changed;
986
987 // Accumulate not inlined callsite information into notInlinedSamples
988 for (const auto &Pair : LocalNotInlinedCallSites) {
989 CallBase *I = Pair.getFirst();
990 Function *Callee = I->getCalledFunction();
991 if (!Callee || Callee->isDeclaration())
992 continue;
993
994 ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "NotInline",
995 I->getDebugLoc(), I->getParent())
996 << "previous inlining not repeated: '"
997 << ore::NV("Callee", Callee) << "' into '"
998 << ore::NV("Caller", &F) << "'");
999
1000 ++NumCSNotInlined;
1001 const FunctionSamples *FS = Pair.getSecond();
1002 if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1003 continue;
1004 }
1005
1006 if (ProfileMergeInlinee) {
1007 // A function call can be replicated by optimizations like callsite
1008 // splitting or jump threading and the replicates end up sharing the
1009 // sample nested callee profile instead of slicing the original inlinee's
1010 // profile. We want to do merge exactly once by filtering out callee
1011 // profiles with a non-zero head sample count.
1012 if (FS->getHeadSamples() == 0) {
1013 // Use entry samples as head samples during the merge, as inlinees
1014 // don't have head samples.
1015 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1016 FS->getEntrySamples());
1017
1018 // Note that we have to do the merge right after processing function.
1019 // This allows OutlineFS's profile to be used for annotation during
1020 // top-down processing of functions' annotation.
1021 FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1022 OutlineFS->merge(*FS);
1023 }
1024 } else {
1025 auto pair =
1026 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1027 pair.first->second.entryCount += FS->getEntrySamples();
1028 }
1029 }
1030 return Changed;
1031}
1032
1033bool SampleProfileLoader::tryInlineCandidate(
1034 InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1035
1036 CallBase &CB = *Candidate.CallInstr;
1037 Function *CalledFunction = CB.getCalledFunction();
1038 assert(CalledFunction && "Expect a callee with definition")((CalledFunction && "Expect a callee with definition"
) ? static_cast<void> (0) : __assert_fail ("CalledFunction && \"Expect a callee with definition\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1038, __PRETTY_FUNCTION__))
;
1039 DebugLoc DLoc = CB.getDebugLoc();
1040 BasicBlock *BB = CB.getParent();
1041
1042 InlineCost Cost = shouldInlineCandidate(Candidate);
1043 if (Cost.isNever()) {
1044 ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG"sample-profile" "-inline", "InlineFail", DLoc, BB)
1045 << "incompatible inlining");
1046 return false;
1047 }
1048
1049 if (!Cost)
1050 return false;
1051
1052 InlineFunctionInfo IFI(nullptr, GetAC);
1053 if (InlineFunction(CB, IFI).isSuccess()) {
1054 // The call to InlineFunction erases I, so we can't pass it here.
1055 emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
1056 true, CSINLINE_DEBUG"sample-profile" "-inline");
1057
1058 // Now populate the list of newly exposed call sites.
1059 if (InlinedCallSites) {
1060 InlinedCallSites->clear();
1061 for (auto &I : IFI.InlinedCallSites)
1062 InlinedCallSites->push_back(I);
1063 }
1064
1065 if (ProfileIsCS)
1066 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1067 ++NumCSInlined;
1068
1069 // Prorate inlined probes for a duplicated inlining callsite which probably
1070 // has a distribution less than 100%. Samples for an inlinee should be
1071 // distributed among the copies of the original callsite based on each
1072 // callsite's distribution factor for counts accuracy. Note that an inlined
1073 // probe may come with its own distribution factor if it has been duplicated
1074 // in the inlinee body. The two factor are multiplied to reflect the
1075 // aggregation of duplication.
1076 if (Candidate.CallsiteDistribution < 1) {
1077 for (auto &I : IFI.InlinedCallSites) {
1078 if (Optional<PseudoProbe> Probe = extractProbe(*I))
1079 setProbeDistributionFactor(*I, Probe->Factor *
1080 Candidate.CallsiteDistribution);
1081 }
1082 NumDuplicatedInlinesite++;
1083 }
1084
1085 return true;
1086 }
1087 return false;
1088}
1089
1090bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1091 CallBase *CB) {
1092 assert(CB && "Expect non-null call instruction")((CB && "Expect non-null call instruction") ? static_cast
<void> (0) : __assert_fail ("CB && \"Expect non-null call instruction\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1092, __PRETTY_FUNCTION__))
;
1093
1094 if (isa<IntrinsicInst>(CB))
1095 return false;
1096
1097 // Find the callee's profile. For indirect call, find hottest target profile.
1098 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1099 if (!CalleeSamples)
1100 return false;
1101
1102 float Factor = 1.0;
1103 if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1104 Factor = Probe->Factor;
1105
1106 uint64_t CallsiteCount = 0;
1107 ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
1108 if (Weight)
1109 CallsiteCount = Weight.get();
1110 if (CalleeSamples)
1111 CallsiteCount = std::max(
1112 CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
1113
1114 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1115 return true;
1116}
1117
1118InlineCost
1119SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1120 std::unique_ptr<InlineAdvice> Advice = nullptr;
1121 if (ExternalInlineAdvisor) {
1122 Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
1123 if (!Advice->isInliningRecommended()) {
1124 Advice->recordUnattemptedInlining();
1125 return InlineCost::getNever("not previously inlined");
1126 }
1127 Advice->recordInlining();
1128 return InlineCost::getAlways("previously inlined");
1129 }
1130
1131 // Adjust threshold based on call site hotness, only do this for callsite
1132 // prioritized inliner because otherwise cost-benefit check is done earlier.
1133 int SampleThreshold = SampleColdCallSiteThreshold;
1134 if (CallsitePrioritizedInline) {
1135 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1136 SampleThreshold = SampleHotCallSiteThreshold;
1137 else if (!ProfileSizeInline)
1138 return InlineCost::getNever("cold callsite");
1139 }
1140
1141 Function *Callee = Candidate.CallInstr->getCalledFunction();
1142 assert(Callee && "Expect a definition for inline candidate of direct call")((Callee && "Expect a definition for inline candidate of direct call"
) ? static_cast<void> (0) : __assert_fail ("Callee && \"Expect a definition for inline candidate of direct call\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1142, __PRETTY_FUNCTION__))
;
1143
1144 InlineParams Params = getInlineParams();
1145 Params.ComputeFullInlineCost = true;
1146 // Checks if there is anything in the reachable portion of the callee at
1147 // this callsite that makes this inlining potentially illegal. Need to
1148 // set ComputeFullInlineCost, otherwise getInlineCost may return early
1149 // when cost exceeds threshold without checking all IRs in the callee.
1150 // The acutal cost does not matter because we only checks isNever() to
1151 // see if it is legal to inline the callsite.
1152 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1153 GetTTI(*Callee), GetAC, GetTLI);
1154
1155 // Honor always inline and never inline from call analyzer
1156 if (Cost.isNever() || Cost.isAlways())
1157 return Cost;
1158
1159 // For old FDO inliner, we inline the call site as long as cost is not
1160 // "Never". The cost-benefit check is done earlier.
1161 if (!CallsitePrioritizedInline) {
1162 return InlineCost::get(Cost.getCost(), INT_MAX2147483647);
1163 }
1164
1165 // Otherwise only use the cost from call analyzer, but overwite threshold with
1166 // Sample PGO threshold.
1167 return InlineCost::get(Cost.getCost(), SampleThreshold);
1168}
1169
1170bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1171 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1172 DenseSet<Instruction *> PromotedInsns;
1173 assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now")((ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"
) ? static_cast<void> (0) : __assert_fail ("ProfileIsCS && \"Prioritiy based inliner only works with CSSPGO now\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1173, __PRETTY_FUNCTION__))
;
1
Assuming field 'ProfileIsCS' is true
2
'?' condition is true
1174
1175 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1176 // Profile symbol list is ignored when profile-sample-accurate is on.
1177 assert((!ProfAccForSymsInList ||(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1181, __PRETTY_FUNCTION__))
3
Assuming field 'ProfAccForSymsInList' is true
4
Assuming the condition is true
5
Assuming the condition is true
6
'?' condition is true
1178 (!ProfileSampleAccurate &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1181, __PRETTY_FUNCTION__))
1179 !F.hasFnAttribute("profile-sample-accurate"))) &&(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1181, __PRETTY_FUNCTION__))
1180 "ProfAccForSymsInList should be false when profile-sample-accurate "(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1181, __PRETTY_FUNCTION__))
1181 "is enabled")(((!ProfAccForSymsInList || (!ProfileSampleAccurate &&
!F.hasFnAttribute("profile-sample-accurate"))) && "ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled") ? static_cast<void> (0) : __assert_fail (
"(!ProfAccForSymsInList || (!ProfileSampleAccurate && !F.hasFnAttribute(\"profile-sample-accurate\"))) && \"ProfAccForSymsInList should be false when profile-sample-accurate \" \"is enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1181, __PRETTY_FUNCTION__))
;
1182
1183 // Populating worklist with initial call sites from root inliner, along
1184 // with call site weights.
1185 CandidateQueue CQueue;
1186 InlineCandidate NewCandidate;
1187 for (auto &BB : F) {
1188 for (auto &I : BB.getInstList()) {
1189 auto *CB = dyn_cast<CallBase>(&I);
1190 if (!CB)
1191 continue;
1192 if (getInlineCandidate(&NewCandidate, CB))
1193 CQueue.push(NewCandidate);
1194 }
1195 }
1196
1197 // Cap the size growth from profile guided inlining. This is needed even
1198 // though cost of each inline candidate already accounts for callee size,
1199 // because with top-down inlining, we can grow inliner size significantly
1200 // with large number of smaller inlinees each pass the cost check.
1201 assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&((ProfileInlineLimitMax >= ProfileInlineLimitMin &&
"Max inline size limit should not be smaller than min inline size "
"limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1203, __PRETTY_FUNCTION__))
7
Assuming the condition is true
8
'?' condition is true
1202 "Max inline size limit should not be smaller than min inline size "((ProfileInlineLimitMax >= ProfileInlineLimitMin &&
"Max inline size limit should not be smaller than min inline size "
"limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1203, __PRETTY_FUNCTION__))
1203 "limit.")((ProfileInlineLimitMax >= ProfileInlineLimitMin &&
"Max inline size limit should not be smaller than min inline size "
"limit.") ? static_cast<void> (0) : __assert_fail ("ProfileInlineLimitMax >= ProfileInlineLimitMin && \"Max inline size limit should not be smaller than min inline size \" \"limit.\""
, "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1203, __PRETTY_FUNCTION__))
;
1204 unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1205 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
1206 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
1207 if (ExternalInlineAdvisor)
9
Taking true branch
1208 SizeLimit = std::numeric_limits<unsigned>::max();
1209
1210 // Perform iterative BFS call site prioritized inlining
1211 bool Changed = false;
1212 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
10
Assuming the condition is true
11
Assuming the condition is true
12
Loop condition is true. Entering loop body
1213 InlineCandidate Candidate = CQueue.top();
1214 CQueue.pop();
1215 CallBase *I = Candidate.CallInstr;
1216 Function *CalledFunction = I->getCalledFunction();
1217
1218 if (CalledFunction == &F)
13
Taking false branch
1219 continue;
1220 if (I->isIndirectCall()) {
14
Assuming the condition is true
15
Taking true branch
1221 if (PromotedInsns.count(I))
16
Assuming the condition is false
17
Taking false branch
1222 continue;
1223 uint64_t Sum;
18
'Sum' declared without an initial value
1224 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
19
Calling 'SampleProfileLoader::findIndirectCallFunctionSamples'
23
Returning from 'SampleProfileLoader::findIndirectCallFunctionSamples'
1225 uint64_t SumOrigin = Sum;
24
Assigned value is garbage or undefined
1226 Sum *= Candidate.CallsiteDistribution;
1227 for (const auto *FS : CalleeSamples) {
1228 // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1229 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1230 FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
1231 PSI->getOrCompHotCountThreshold());
1232 continue;
1233 }
1234 uint64_t EntryCountDistributed =
1235 FS->getEntrySamples() * Candidate.CallsiteDistribution;
1236 // In addition to regular inline cost check, we also need to make sure
1237 // ICP isn't introducing excessive speculative checks even if individual
1238 // target looks beneficial to promote and inline. That means we should
1239 // only do ICP when there's a small number dominant targets.
1240 if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
1241 break;
1242 // TODO: Fix CallAnalyzer to handle all indirect calls.
1243 // For indirect call, we don't run CallAnalyzer to get InlineCost
1244 // before actual inlining. This is because we could see two different
1245 // types from the same definition, which makes CallAnalyzer choke as
1246 // it's expecting matching parameter type on both caller and callee
1247 // side. See example from PR18962 for the triggering cases (the bug was
1248 // fixed, but we generate different types).
1249 if (!PSI->isHotCount(EntryCountDistributed))
1250 break;
1251 SmallVector<CallBase *, 8> InlinedCallSites;
1252 // Attach function profile for promoted indirect callee, and update
1253 // call site count for the promoted inline candidate too.
1254 Candidate = {I, FS, EntryCountDistributed,
1255 Candidate.CallsiteDistribution};
1256 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1257 PromotedInsns, &InlinedCallSites)) {
1258 for (auto *CB : InlinedCallSites) {
1259 if (getInlineCandidate(&NewCandidate, CB))
1260 CQueue.emplace(NewCandidate);
1261 }
1262 Changed = true;
1263 }
1264 }
1265 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1266 !CalledFunction->isDeclaration()) {
1267 SmallVector<CallBase *, 8> InlinedCallSites;
1268 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1269 for (auto *CB : InlinedCallSites) {
1270 if (getInlineCandidate(&NewCandidate, CB))
1271 CQueue.emplace(NewCandidate);
1272 }
1273 Changed = true;
1274 }
1275 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1276 findCalleeFunctionSamples(*I)->findInlinedFunctions(
1277 InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
1278 }
1279 }
1280
1281 if (!CQueue.empty()) {
1282 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1283 ++NumCSInlinedHitMaxLimit;
1284 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1285 ++NumCSInlinedHitMinLimit;
1286 else
1287 ++NumCSInlinedHitGrowthLimit;
1288 }
1289
1290 return Changed;
1291}
1292
1293/// Returns the sorted CallTargetMap \p M by count in descending order.
1294static SmallVector<InstrProfValueData, 2>
1295GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) {
1296 SmallVector<InstrProfValueData, 2> R;
1297 for (const auto &I : SampleRecord::SortCallTargets(M)) {
1298 R.emplace_back(
1299 InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1300 }
1301 return R;
1302}
1303
1304// Generate MD_prof metadata for every branch instruction using the
1305// edge weights computed during propagation.
1306void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1307 // Generate MD_prof metadata for every branch instruction using the
1308 // edge weights computed during propagation.
1309 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nPropagation complete. Setting branch weights\n"
; } } while (false)
;
1310 LLVMContext &Ctx = F.getContext();
1311 MDBuilder MDB(Ctx);
1312 for (auto &BI : F) {
1313 BasicBlock *BB = &BI;
1314
1315 if (BlockWeights[BB]) {
1316 for (auto &I : BB->getInstList()) {
1317 if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1318 continue;
1319 if (!cast<CallBase>(I).getCalledFunction()) {
1320 const DebugLoc &DLoc = I.getDebugLoc();
1321 if (!DLoc)
1322 continue;
1323 const DILocation *DIL = DLoc;
1324 const FunctionSamples *FS = findFunctionSamples(I);
1325 if (!FS)
1326 continue;
1327 auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
1328 auto T = FS->findCallTargetMapAt(CallSite);
1329 if (!T || T.get().empty())
1330 continue;
1331 // Prorate the callsite counts to reflect what is already done to the
1332 // callsite, such as ICP or calliste cloning.
1333 if (FunctionSamples::ProfileIsProbeBased) {
1334 if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1335 if (Probe->Factor < 1)
1336 T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1337 }
1338 }
1339 SmallVector<InstrProfValueData, 2> SortedCallTargets =
1340 GetSortedValueDataFromCallTargets(T.get());
1341 uint64_t Sum = 0;
1342 for (const auto &C : T.get())
1343 Sum += C.second;
1344 // With CSSPGO all indirect call targets are counted torwards the
1345 // original indirect call site in the profile, including both
1346 // inlined and non-inlined targets.
1347 if (!FunctionSamples::ProfileIsCS) {
1348 if (const FunctionSamplesMap *M =
1349 FS->findFunctionSamplesMapAt(CallSite)) {
1350 for (const auto &NameFS : *M)
1351 Sum += NameFS.second.getEntrySamples();
1352 }
1353 }
1354 updateIDTMetaData(I, SortedCallTargets, Sum);
1355 } else if (!isa<IntrinsicInst>(&I)) {
1356 I.setMetadata(LLVMContext::MD_prof,
1357 MDB.createBranchWeights(
1358 {static_cast<uint32_t>(BlockWeights[BB])}));
1359 }
1360 }
1361 }
1362 Instruction *TI = BB->getTerminator();
1363 if (TI->getNumSuccessors() == 1)
1364 continue;
1365 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1366 continue;
1367
1368 DebugLoc BranchLoc = TI->getDebugLoc();
1369 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nGetting weights for branch at line "
<< ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>"
)) << ".\n"; } } while (false)
1370 << ((BranchLoc) ? Twine(BranchLoc.getLine())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nGetting weights for branch at line "
<< ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>"
)) << ".\n"; } } while (false)
1371 : Twine("<UNKNOWN LOCATION>"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nGetting weights for branch at line "
<< ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>"
)) << ".\n"; } } while (false)
1372 << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\nGetting weights for branch at line "
<< ((BranchLoc) ? Twine(BranchLoc.getLine()) : Twine("<UNKNOWN LOCATION>"
)) << ".\n"; } } while (false)
;
1373 SmallVector<uint32_t, 4> Weights;
1374 uint32_t MaxWeight = 0;
1375 Instruction *MaxDestInst;
1376 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1377 BasicBlock *Succ = TI->getSuccessor(I);
1378 Edge E = std::make_pair(BB, Succ);
1379 uint64_t Weight = EdgeWeights[E];
1380 LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\t"; printEdgeWeight(dbgs
(), E); } } while (false)
;
1381 // Use uint32_t saturated arithmetic to adjust the incoming weights,
1382 // if needed. Sample counts in profiles are 64-bit unsigned values,
1383 // but internally branch weights are expressed as 32-bit values.
1384 if (Weight > std::numeric_limits<uint32_t>::max()) {
1385 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << " (saturated due to uint32_t overflow)"
; } } while (false)
;
1386 Weight = std::numeric_limits<uint32_t>::max();
1387 }
1388 // Weight is added by one to avoid propagation errors introduced by
1389 // 0 weights.
1390 Weights.push_back(static_cast<uint32_t>(Weight + 1));
1391 if (Weight != 0) {
1392 if (Weight > MaxWeight) {
1393 MaxWeight = Weight;
1394 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1395 }
1396 }
1397 }
1398
1399 uint64_t TempWeight;
1400 // Only set weights if there is at least one non-zero weight.
1401 // In any other case, let the analyzer set weights.
1402 // Do not set weights if the weights are present. In ThinLTO, the profile
1403 // annotation is done twice. If the first annotation already set the
1404 // weights, the second pass does not need to set it.
1405 if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1406 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "SUCCESS. Found non-zero weights.\n"
; } } while (false)
;
1407 TI->setMetadata(LLVMContext::MD_prof,
1408 MDB.createBranchWeights(Weights));
1409 ORE->emit([&]() {
1410 return OptimizationRemark(DEBUG_TYPE"sample-profile", "PopularDest", MaxDestInst)
1411 << "most popular destination for conditional branches at "
1412 << ore::NV("CondBranchesLoc", BranchLoc);
1413 });
1414 } else {
1415 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "SKIPPED. All branch weights are zero.\n"
; } } while (false)
;
1416 }
1417 }
1418}
1419
1420/// Once all the branch weights are computed, we emit the MD_prof
1421/// metadata on BB using the computed values for each of its branches.
1422///
1423/// \param F The function to query.
1424///
1425/// \returns true if \p F was modified. Returns false, otherwise.
1426bool SampleProfileLoader::emitAnnotations(Function &F) {
1427 bool Changed = false;
1428
1429 if (FunctionSamples::ProfileIsProbeBased) {
1430 if (!ProbeManager->profileIsValid(F, *Samples)) {
1431 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function "
<< F.getName(); } } while (false)
1432 dbgs() << "Profile is invalid due to CFG mismatch for Function "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function "
<< F.getName(); } } while (false)
1433 << F.getName())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "Profile is invalid due to CFG mismatch for Function "
<< F.getName(); } } while (false)
;
1434 ++NumMismatchedProfile;
1435 return false;
1436 }
1437 ++NumMatchedProfile;
1438 } else {
1439 if (getFunctionLoc(F) == 0)
1440 return false;
1441
1442 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "Line number for the first instruction in "
<< F.getName() << ": " << getFunctionLoc(F
) << "\n"; } } while (false)
1443 << F.getName() << ": " << getFunctionLoc(F) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "Line number for the first instruction in "
<< F.getName() << ": " << getFunctionLoc(F
) << "\n"; } } while (false)
;
1444 }
1445
1446 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1447 if (ProfileIsCS && CallsitePrioritizedInline)
1448 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1449 else
1450 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1451
1452 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1453
1454 if (Changed)
1455 generateMDProfMetadata(F);
1456
1457 emitCoverageRemarks(F);
1458 return Changed;
1459}
1460
1461char SampleProfileLoaderLegacyPass::ID = 0;
1462
1463INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",static void *initializeSampleProfileLoaderLegacyPassPassOnce(
PassRegistry &Registry) {
1464 "Sample Profile loader", false, false)static void *initializeSampleProfileLoaderLegacyPassPassOnce(
PassRegistry &Registry) {
1465INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
1466INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
1467INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
1468INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
1469INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile"
, &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SampleProfileLoaderLegacyPass>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag
; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag
, initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry
)); }
1470 "Sample Profile loader", false, false)PassInfo *PI = new PassInfo( "Sample Profile loader", "sample-profile"
, &SampleProfileLoaderLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SampleProfileLoaderLegacyPass>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSampleProfileLoaderLegacyPassPassFlag
; void llvm::initializeSampleProfileLoaderLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSampleProfileLoaderLegacyPassPassFlag
, initializeSampleProfileLoaderLegacyPassPassOnce, std::ref(Registry
)); }
1471
1472// Add inlined profile call edges to the call graph.
1473void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
1474 const FunctionSamples &Samples) {
1475 Function *Caller = SymbolMap.lookup(Samples.getFuncName());
1476 if (!Caller || Caller->isDeclaration())
1477 return;
1478
1479 // Skip non-inlined call edges which are not important since top down inlining
1480 // for non-CS profile is to get more precise profile matching, not to enable
1481 // more inlining.
1482
1483 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1484 for (const auto &InlinedSamples : CallsiteSamples.second) {
1485 Function *Callee = SymbolMap.lookup(InlinedSamples.first);
1486 if (Callee && !Callee->isDeclaration())
1487 CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
1488 addCallGraphEdges(CG, InlinedSamples.second);
1489 }
1490 }
1491}
1492
1493// Replace call graph edges with dynamic call edges from the profile.
1494void SampleProfileLoader::replaceCallGraphEdges(
1495 CallGraph &CG, StringMap<Function *> &SymbolMap) {
1496 // Remove static call edges from the call graph except for the ones from the
1497 // root which make the call graph connected.
1498 for (const auto &Node : CG)
1499 if (Node.second.get() != CG.getExternalCallingNode())
1500 Node.second->removeAllCalledFunctions();
1501
1502 // Add profile call edges to the call graph.
1503 if (ProfileIsCS) {
1504 ContextTracker->addCallGraphEdges(CG, SymbolMap);
1505 } else {
1506 for (const auto &Samples : Reader->getProfiles())
1507 addCallGraphEdges(CG, Samples.second);
1508 }
1509}
1510
1511std::vector<Function *>
1512SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1513 std::vector<Function *> FunctionOrderList;
1514 FunctionOrderList.reserve(M.size());
1515
1516 if (!ProfileTopDownLoad || CG == nullptr) {
1517 if (ProfileMergeInlinee) {
1518 // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1519 // because the profile for a function may be used for the profile
1520 // annotation of its outline copy before the profile merging of its
1521 // non-inlined inline instances, and that is not the way how
1522 // ProfileMergeInlinee is supposed to work.
1523 ProfileMergeInlinee = false;
1524 }
1525
1526 for (Function &F : M)
1527 if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1528 FunctionOrderList.push_back(&F);
1529 return FunctionOrderList;
1530 }
1531
1532 assert(&CG->getModule() == &M)((&CG->getModule() == &M) ? static_cast<void>
(0) : __assert_fail ("&CG->getModule() == &M", "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1532, __PRETTY_FUNCTION__))
;
1533
1534 // Add indirect call edges from profile to augment the static call graph.
1535 // Functions will be processed in a top-down order defined by the static call
1536 // graph. Adjusting the order by considering indirect call edges from the
1537 // profile (which don't exist in the static call graph) can enable the
1538 // inlining of indirect call targets by processing the caller before them.
1539 // TODO: enable this for non-CS profile and fix the counts returning logic to
1540 // have a full support for indirect calls.
1541 if (UseProfileIndirectCallEdges && ProfileIsCS) {
1542 for (auto &Entry : *CG) {
1543 const auto *F = Entry.first;
1544 if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1545 continue;
1546 auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
1547 if (AllContexts.empty())
1548 continue;
1549
1550 for (const auto &BB : *F) {
1551 for (const auto &I : BB.getInstList()) {
1552 const auto *CB = dyn_cast<CallBase>(&I);
1553 if (!CB || !CB->isIndirectCall())
1554 continue;
1555 const DebugLoc &DLoc = I.getDebugLoc();
1556 if (!DLoc)
1557 continue;
1558 auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
1559 for (FunctionSamples *Samples : AllContexts) {
1560 if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
1561 for (const auto &Target : CallTargets.get()) {
1562 Function *Callee = SymbolMap.lookup(Target.first());
1563 if (Callee && !Callee->isDeclaration())
1564 Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
1565 }
1566 }
1567 }
1568 }
1569 }
1570 }
1571 }
1572
1573 // Compute a top-down order the profile which is used to sort functions in
1574 // one SCC later. The static processing order computed for an SCC may not
1575 // reflect the call contexts in the context-sensitive profile, thus may cause
1576 // potential inlining to be overlooked. The function order in one SCC is being
1577 // adjusted to a top-down order based on the profile to favor more inlining.
1578 DenseMap<Function *, uint64_t> ProfileOrderMap;
1579 if (UseProfileTopDownOrder ||
1580 (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
1581 // Create a static call graph. The call edges are not important since they
1582 // will be replaced by dynamic edges from the profile.
1583 CallGraph ProfileCG(M);
1584 replaceCallGraphEdges(ProfileCG, SymbolMap);
1585 scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
1586 uint64_t I = 0;
1587 while (!CGI.isAtEnd()) {
1588 for (CallGraphNode *Node : *CGI) {
1589 if (auto *F = Node->getFunction())
1590 ProfileOrderMap[F] = ++I;
1591 }
1592 ++CGI;
1593 }
1594 }
1595
1596 scc_iterator<CallGraph *> CGI = scc_begin(CG);
1597 while (!CGI.isAtEnd()) {
1598 uint64_t Start = FunctionOrderList.size();
1599 for (CallGraphNode *Node : *CGI) {
1600 auto *F = Node->getFunction();
1601 if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1602 FunctionOrderList.push_back(F);
1603 }
1604
1605 // Sort nodes in SCC based on the profile top-down order.
1606 if (!ProfileOrderMap.empty()) {
1607 std::stable_sort(FunctionOrderList.begin() + Start,
1608 FunctionOrderList.end(),
1609 [&ProfileOrderMap](Function *Left, Function *Right) {
1610 return ProfileOrderMap[Left] < ProfileOrderMap[Right];
1611 });
1612 }
1613
1614 ++CGI;
1615 }
1616
1617 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
1618 dbgs() << "Function processing order:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
1619 for (auto F : reverse(FunctionOrderList)) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
1620 dbgs() << F->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
1621 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
1622 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { { dbgs() << "Function processing order:\n"
; for (auto F : reverse(FunctionOrderList)) { dbgs() <<
F->getName() << "\n"; } }; } } while (false)
;
1623
1624 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1625 return FunctionOrderList;
1626}
1627
1628bool SampleProfileLoader::doInitialization(Module &M,
1629 FunctionAnalysisManager *FAM) {
1630 auto &Ctx = M.getContext();
1631
1632 auto ReaderOrErr =
1633 SampleProfileReader::create(Filename, Ctx, RemappingFilename);
1634 if (std::error_code EC = ReaderOrErr.getError()) {
1635 std::string Msg = "Could not open profile: " + EC.message();
1636 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1637 return false;
1638 }
1639 Reader = std::move(ReaderOrErr.get());
1640 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1641 Reader->collectFuncsFrom(M);
1642 if (std::error_code EC = Reader->read()) {
1643 std::string Msg = "profile reading failed: " + EC.message();
1644 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1645 return false;
1646 }
1647
1648 PSL = Reader->getProfileSymbolList();
1649
1650 // While profile-sample-accurate is on, ignore symbol list.
1651 ProfAccForSymsInList =
1652 ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
1653 if (ProfAccForSymsInList) {
1654 NamesInProfile.clear();
1655 if (auto NameTable = Reader->getNameTable())
1656 NamesInProfile.insert(NameTable->begin(), NameTable->end());
1657 CoverageTracker.setProfAccForSymsInList(true);
1658 }
1659
1660 if (FAM && !ProfileInlineReplayFile.empty()) {
1661 ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
1662 M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
1663 /*EmitRemarks=*/false);
1664 if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
1665 ExternalInlineAdvisor.reset();
1666 }
1667
1668 // Apply tweaks if context-sensitive profile is available.
1669 if (Reader->profileIsCS()) {
1670 ProfileIsCS = true;
1671 FunctionSamples::ProfileIsCS = true;
1672
1673 // Enable priority-base inliner and size inline by default for CSSPGO.
1674 if (!ProfileSizeInline.getNumOccurrences())
1675 ProfileSizeInline = true;
1676 if (!CallsitePrioritizedInline.getNumOccurrences())
1677 CallsitePrioritizedInline = true;
1678
1679 // Tracker for profiles under different context
1680 ContextTracker =
1681 std::make_unique<SampleContextTracker>(Reader->getProfiles());
1682 }
1683
1684 // Load pseudo probe descriptors for probe-based function samples.
1685 if (Reader->profileIsProbeBased()) {
1686 ProbeManager = std::make_unique<PseudoProbeManager>(M);
1687 if (!ProbeManager->moduleIsProbed(M)) {
1688 const char *Msg =
1689 "Pseudo-probe-based profile requires SampleProfileProbePass";
1690 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1691 return false;
1692 }
1693 }
1694
1695 return true;
1696}
1697
1698ModulePass *llvm::createSampleProfileLoaderPass() {
1699 return new SampleProfileLoaderLegacyPass();
1700}
1701
1702ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
1703 return new SampleProfileLoaderLegacyPass(Name);
1704}
1705
1706bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1707 ProfileSummaryInfo *_PSI, CallGraph *CG) {
1708 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1709
1710 PSI = _PSI;
1711 if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
1712 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1713 ProfileSummary::PSK_Sample);
1714 PSI->refresh();
1715 }
1716 // Compute the total number of samples collected in this profile.
1717 for (const auto &I : Reader->getProfiles())
1718 TotalCollectedSamples += I.second.getTotalSamples();
1719
1720 auto Remapper = Reader->getRemapper();
1721 // Populate the symbol map.
1722 for (const auto &N_F : M.getValueSymbolTable()) {
1723 StringRef OrigName = N_F.getKey();
1724 Function *F = dyn_cast<Function>(N_F.getValue());
1725 if (F == nullptr)
1726 continue;
1727 SymbolMap[OrigName] = F;
1728 auto pos = OrigName.find('.');
1729 if (pos != StringRef::npos) {
1730 StringRef NewName = OrigName.substr(0, pos);
1731 auto r = SymbolMap.insert(std::make_pair(NewName, F));
1732 // Failiing to insert means there is already an entry in SymbolMap,
1733 // thus there are multiple functions that are mapped to the same
1734 // stripped name. In this case of name conflicting, set the value
1735 // to nullptr to avoid confusion.
1736 if (!r.second)
1737 r.first->second = nullptr;
1738 OrigName = NewName;
1739 }
1740 // Insert the remapped names into SymbolMap.
1741 if (Remapper) {
1742 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
1743 if (*MapName == OrigName)
1744 continue;
1745 SymbolMap.insert(std::make_pair(*MapName, F));
1746 }
1747 }
1748 }
1749
1750 bool retval = false;
1751 for (auto F : buildFunctionOrder(M, CG)) {
1752 assert(!F->isDeclaration())((!F->isDeclaration()) ? static_cast<void> (0) : __assert_fail
("!F->isDeclaration()", "/build/llvm-toolchain-snapshot-13~++20210304100658+2f37cdd5699f/llvm/lib/Transforms/IPO/SampleProfile.cpp"
, 1752, __PRETTY_FUNCTION__))
;
1753 clearFunctionData();
1754 retval |= runOnFunction(*F, AM);
1755 }
1756
1757 // Account for cold calls not inlined....
1758 if (!ProfileIsCS)
1759 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1760 notInlinedCallInfo)
1761 updateProfileCallee(pair.first, pair.second.entryCount);
1762
1763 return retval;
1764}
1765
1766bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1767 ACT = &getAnalysis<AssumptionCacheTracker>();
1768 TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1769 TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
1770 ProfileSummaryInfo *PSI =
1771 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1772 return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
1773}
1774
1775bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
1776 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sample-profile")) { dbgs() << "\n\nProcessing Function "
<< F.getName() << "\n"; } } while (false)
;
1777 DILocation2SampleMap.clear();
1778 // By default the entry count is initialized to -1, which will be treated
1779 // conservatively by getEntryCount as the same as unknown (None). This is
1780 // to avoid newly added code to be treated as cold. If we have samples
1781 // this will be overwritten in emitAnnotations.
1782 uint64_t initialEntryCount = -1;
1783
1784 ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
1785 if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
1786 // initialize all the function entry counts to 0. It means all the
1787 // functions without profile will be regarded as cold.
1788 initialEntryCount = 0;
1789 // profile-sample-accurate is a user assertion which has a higher precedence
1790 // than symbol list. When profile-sample-accurate is on, ignore symbol list.
1791 ProfAccForSymsInList = false;
1792 }
1793 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
1794
1795 // PSL -- profile symbol list include all the symbols in sampled binary.
1796 // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
1797 // old functions without samples being cold, without having to worry
1798 // about new and hot functions being mistakenly treated as cold.
1799 if (ProfAccForSymsInList) {
1800 // Initialize the entry count to 0 for functions in the list.
1801 if (PSL->contains(F.getName()))
1802 initialEntryCount = 0;
1803
1804 // Function in the symbol list but without sample will be regarded as
1805 // cold. To minimize the potential negative performance impact it could
1806 // have, we want to be a little conservative here saying if a function
1807 // shows up in the profile, no matter as outline function, inline instance
1808 // or call targets, treat the function as not being cold. This will handle
1809 // the cases such as most callsites of a function are inlined in sampled
1810 // binary but not inlined in current build (because of source code drift,
1811 // imprecise debug information, or the callsites are all cold individually
1812 // but not cold accumulatively...), so the outline function showing up as
1813 // cold in sampled binary will actually not be cold after current build.
1814 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
1815 if (NamesInProfile.count(CanonName))
1816 initialEntryCount = -1;
1817 }
1818
1819 // Initialize entry count when the function has no existing entry
1820 // count value.
1821 if (!F.getEntryCount().hasValue())
1822 F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1823 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1824 if (AM) {
1825 auto &FAM =
1826 AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
1827 .getManager();
1828 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1829 } else {
1830 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
1831 ORE = OwnedORE.get();
1832 }
1833
1834 if (ProfileIsCS)
1835 Samples = ContextTracker->getBaseSamplesFor(F);
1836 else
1837 Samples = Reader->getSamplesFor(F);
1838
1839 if (Samples && !Samples->empty())
1840 return emitAnnotations(F);
1841 return false;
1842}
1843
1844PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
1845 ModuleAnalysisManager &AM) {
1846 FunctionAnalysisManager &FAM =
1847 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1848
1849 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1850 return FAM.getResult<AssumptionAnalysis>(F);
1851 };
1852 auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1853 return FAM.getResult<TargetIRAnalysis>(F);
1854 };
1855 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
1856 return FAM.getResult<TargetLibraryAnalysis>(F);
1857 };
1858
1859 SampleProfileLoader SampleLoader(
1860 ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1861 ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1862 : ProfileRemappingFileName,
1863 LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
1864
1865 if (!SampleLoader.doInitialization(M, &FAM))
1866 return PreservedAnalyses::all();
1867
1868 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1869 CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
1870 if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
1871 return PreservedAnalyses::all();
1872
1873 return PreservedAnalyses::none();
1874}