LLVM 19.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileLoader transformation. This pass
10// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12// profile information in the given profile.
13//
14// This pass generates branch weight annotations on the IR:
15//
16// - prof: Represents branch weights. This annotation is added to branches
17// to indicate the weights of each edge coming out of the branch.
18// The weight of each edge is the weight of the target block for
19// that edge. The weight of a block B is computed as the maximum
20// number of samples found in B.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/StringMap.h"
34#include "llvm/ADT/StringRef.h"
35#include "llvm/ADT/Twine.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/DebugLoc.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalValue.h"
51#include "llvm/IR/InstrTypes.h"
52#include "llvm/IR/Instruction.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/MDBuilder.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/PseudoProbe.h"
67#include "llvm/Support/Debug.h"
71#include "llvm/Transforms/IPO.h"
82#include <algorithm>
83#include <cassert>
84#include <cstdint>
85#include <functional>
86#include <limits>
87#include <map>
88#include <memory>
89#include <queue>
90#include <string>
91#include <system_error>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96using namespace sampleprof;
97using namespace llvm::sampleprofutil;
99#define DEBUG_TYPE "sample-profile"
100#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
101
102STATISTIC(NumCSInlined,
103 "Number of functions inlined with context sensitive profile");
104STATISTIC(NumCSNotInlined,
105 "Number of functions not inlined with context sensitive profile");
106STATISTIC(NumMismatchedProfile,
107 "Number of functions with CFG mismatched profile");
108STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
109STATISTIC(NumDuplicatedInlinesite,
110 "Number of inlined callsites with a partial distribution factor");
111
112STATISTIC(NumCSInlinedHitMinLimit,
113 "Number of functions with FDO inline stopped due to min size limit");
114STATISTIC(NumCSInlinedHitMaxLimit,
115 "Number of functions with FDO inline stopped due to max size limit");
117 NumCSInlinedHitGrowthLimit,
118 "Number of functions with FDO inline stopped due to growth size limit");
119
120// Command line option to specify the file to read samples from. This is
121// mainly used for debugging.
123 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
124 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
125
126// The named file contains a set of transformations that may have been applied
127// to the symbol names between the program from which the sample data was
128// collected and the current program's symbols.
130 "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
131 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
132
134 "salvage-stale-profile", cl::Hidden, cl::init(false),
135 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
136 "location for sample profile query."));
137
139 "report-profile-staleness", cl::Hidden, cl::init(false),
140 cl::desc("Compute and report stale profile statistical metrics."));
141
143 "persist-profile-staleness", cl::Hidden, cl::init(false),
144 cl::desc("Compute stale profile statistical metrics and write it into the "
145 "native object file(.llvm_stats section)."));
146
148 "profile-sample-accurate", cl::Hidden, cl::init(false),
149 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
150 "callsite and function as having 0 samples. Otherwise, treat "
151 "un-sampled callsites and functions conservatively as unknown. "));
152
154 "profile-sample-block-accurate", cl::Hidden, cl::init(false),
155 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
156 "branches and calls as having 0 samples. Otherwise, treat "
157 "them conservatively as unknown. "));
158
160 "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
161 cl::desc("For symbols in profile symbol list, regard their profiles to "
162 "be accurate. It may be overriden by profile-sample-accurate. "));
163
165 "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
166 cl::desc("Merge past inlinee's profile to outline version if sample "
167 "profile loader decided not to inline a call site. It will "
168 "only be enabled when top-down order of profile loading is "
169 "enabled. "));
170
172 "sample-profile-top-down-load", cl::Hidden, cl::init(true),
173 cl::desc("Do profile annotation and inlining for functions in top-down "
174 "order of call graph during sample profile loading. It only "
175 "works for new pass manager. "));
176
177static cl::opt<bool>
178 UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
179 cl::desc("Process functions in a top-down order "
180 "defined by the profiled call graph when "
181 "-sample-profile-top-down-load is on."));
182
184 "sample-profile-inline-size", cl::Hidden, cl::init(false),
185 cl::desc("Inline cold call sites in profile loader if it's beneficial "
186 "for code size."));
187
188// Since profiles are consumed by many passes, turning on this option has
189// side effects. For instance, pre-link SCC inliner would see merged profiles
190// and inline the hot functions (that are skipped in this pass).
192 "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
193 cl::desc("If true, artifically skip inline transformation in sample-loader "
194 "pass, and merge (or scale) profiles (as configured by "
195 "--sample-profile-merge-inlinee)."));
196
197namespace llvm {
199 SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
200 cl::desc("Sort profiled recursion by edge weights."));
201
203 "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
204 cl::desc("The size growth ratio limit for proirity-based sample profile "
205 "loader inlining."));
206
208 "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
209 cl::desc("The lower bound of size growth limit for "
210 "proirity-based sample profile loader inlining."));
211
213 "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
214 cl::desc("The upper bound of size growth limit for "
215 "proirity-based sample profile loader inlining."));
216
218 "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
219 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
220 "inlining."));
221
223 "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
224 cl::desc("Threshold for inlining cold callsites"));
225} // namespace llvm
226
228 "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
229 cl::desc(
230 "Relative hotness percentage threshold for indirect "
231 "call promotion in proirity-based sample profile loader inlining."));
232
234 "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
235 cl::desc(
236 "Skip relative hotness check for ICP up to given number of targets."));
237
239 "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
240 cl::desc("A function is considered hot for staleness error check if its "
241 "total sample count is above the specified percentile"));
242
244 "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
245 cl::desc("Skip the check if the number of hot functions is smaller than "
246 "the specified number."));
247
249 "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
250 cl::desc("Reject the profile if the mismatch percent is higher than the "
251 "given number."));
252
254 "sample-profile-prioritized-inline", cl::Hidden,
255
256 cl::desc("Use call site prioritized inlining for sample profile loader."
257 "Currently only CSSPGO is supported."));
258
260 "sample-profile-use-preinliner", cl::Hidden,
261
262 cl::desc("Use the preinliner decisions stored in profile context."));
263
265 "sample-profile-recursive-inline", cl::Hidden,
266
267 cl::desc("Allow sample loader inliner to inline recursive calls."));
268
270 "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
271 cl::desc(
272 "Optimization remarks file containing inline remarks to be replayed "
273 "by inlining from sample profile loader."),
274 cl::Hidden);
275
277 "sample-profile-inline-replay-scope",
278 cl::init(ReplayInlinerSettings::Scope::Function),
279 cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
280 "Replay on functions that have remarks associated "
281 "with them (default)"),
282 clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
283 "Replay on the entire module")),
284 cl::desc("Whether inline replay should be applied to the entire "
285 "Module or just the Functions (default) that are present as "
286 "callers in remarks during sample profile inlining."),
287 cl::Hidden);
288
290 "sample-profile-inline-replay-fallback",
291 cl::init(ReplayInlinerSettings::Fallback::Original),
294 ReplayInlinerSettings::Fallback::Original, "Original",
295 "All decisions not in replay send to original advisor (default)"),
296 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
297 "AlwaysInline", "All decisions not in replay are inlined"),
298 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
299 "All decisions not in replay are not inlined")),
300 cl::desc("How sample profile inline replay treats sites that don't come "
301 "from the replay. Original: defers to original advisor, "
302 "AlwaysInline: inline all sites not in replay, NeverInline: "
303 "inline no sites not in replay"),
304 cl::Hidden);
305
307 "sample-profile-inline-replay-format",
308 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
310 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
311 clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
312 "<Line Number>:<Column Number>"),
313 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
314 "LineDiscriminator", "<Line Number>.<Discriminator>"),
315 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
316 "LineColumnDiscriminator",
317 "<Line Number>:<Column Number>.<Discriminator> (default)")),
318 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
319
321 MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
322 cl::desc("Max number of promotions for a single indirect "
323 "call callsite in sample profile loader"));
324
326 "overwrite-existing-weights", cl::Hidden, cl::init(false),
327 cl::desc("Ignore existing branch weights on IR and always overwrite."));
328
330 "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
331 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
332 "sample-profile inline pass name."));
333
334namespace llvm {
336}
337
338namespace {
339
340using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
341using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
342using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
343using EdgeWeightMap = DenseMap<Edge, uint64_t>;
344using BlockEdgeMap =
346
347class GUIDToFuncNameMapper {
348public:
349 GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
350 DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
351 : CurrentReader(Reader), CurrentModule(M),
352 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
353 if (!CurrentReader.useMD5())
354 return;
355
356 for (const auto &F : CurrentModule) {
357 StringRef OrigName = F.getName();
358 CurrentGUIDToFuncNameMap.insert(
359 {Function::getGUID(OrigName), OrigName});
360
361 // Local to global var promotion used by optimization like thinlto
362 // will rename the var and add suffix like ".llvm.xxx" to the
363 // original local name. In sample profile, the suffixes of function
364 // names are all stripped. Since it is possible that the mapper is
365 // built in post-thin-link phase and var promotion has been done,
366 // we need to add the substring of function name without the suffix
367 // into the GUIDToFuncNameMap.
369 if (CanonName != OrigName)
370 CurrentGUIDToFuncNameMap.insert(
371 {Function::getGUID(CanonName), CanonName});
372 }
373
374 // Update GUIDToFuncNameMap for each function including inlinees.
375 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
376 }
377
378 ~GUIDToFuncNameMapper() {
379 if (!CurrentReader.useMD5())
380 return;
381
382 CurrentGUIDToFuncNameMap.clear();
383
384 // Reset GUIDToFuncNameMap for of each function as they're no
385 // longer valid at this point.
386 SetGUIDToFuncNameMapForAll(nullptr);
387 }
388
389private:
390 void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
391 std::queue<FunctionSamples *> FSToUpdate;
392 for (auto &IFS : CurrentReader.getProfiles()) {
393 FSToUpdate.push(&IFS.second);
394 }
395
396 while (!FSToUpdate.empty()) {
397 FunctionSamples *FS = FSToUpdate.front();
398 FSToUpdate.pop();
399 FS->GUIDToFuncNameMap = Map;
400 for (const auto &ICS : FS->getCallsiteSamples()) {
401 const FunctionSamplesMap &FSMap = ICS.second;
402 for (const auto &IFS : FSMap) {
403 FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
404 FSToUpdate.push(&FS);
405 }
406 }
407 }
408 }
409
411 Module &CurrentModule;
412 DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
413};
414
415// Inline candidate used by iterative callsite prioritized inliner
416struct InlineCandidate {
417 CallBase *CallInstr;
418 const FunctionSamples *CalleeSamples;
419 // Prorated callsite count, which will be used to guide inlining. For example,
420 // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
421 // copies will get their own distribution factors and their prorated counts
422 // will be used to decide if they should be inlined independently.
423 uint64_t CallsiteCount;
424 // Call site distribution factor to prorate the profile samples for a
425 // duplicated callsite. Default value is 1.0.
426 float CallsiteDistribution;
427};
428
429// Inline candidate comparer using call site weight
430struct CandidateComparer {
431 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
432 if (LHS.CallsiteCount != RHS.CallsiteCount)
433 return LHS.CallsiteCount < RHS.CallsiteCount;
434
435 const FunctionSamples *LCS = LHS.CalleeSamples;
436 const FunctionSamples *RCS = RHS.CalleeSamples;
437 assert(LCS && RCS && "Expect non-null FunctionSamples");
438
439 // Tie breaker using number of samples try to favor smaller functions first
440 if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
441 return LCS->getBodySamples().size() > RCS->getBodySamples().size();
442
443 // Tie breaker using GUID so we have stable/deterministic inlining order
444 return LCS->getGUID() < RCS->getGUID();
445 }
446};
447
448using CandidateQueue =
450 CandidateComparer>;
451
452/// Sample profile pass.
453///
454/// This pass reads profile data from the file specified by
455/// -sample-profile-file and annotates every affected function with the
456/// profile information found in that file.
457class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
458public:
459 SampleProfileLoader(
460 StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
462 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
463 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
464 std::function<const TargetLibraryInfo &(Function &)> GetTLI)
466 std::move(FS)),
467 GetAC(std::move(GetAssumptionCache)),
468 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
469 LTOPhase(LTOPhase),
470 AnnotatedPassName(AnnotateSampleProfileInlinePhase
473 : CSINLINE_DEBUG) {}
474
475 bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
476 bool runOnModule(Module &M, ModuleAnalysisManager *AM,
478
479protected:
481 bool emitAnnotations(Function &F);
483 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
484 const FunctionSamples *
485 findFunctionSamples(const Instruction &I) const override;
486 std::vector<const FunctionSamples *>
487 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
488 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
489 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
490 uint64_t Threshold);
491 // Attempt to promote indirect call and also inline the promoted call
492 bool tryPromoteAndInlineCandidate(
493 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
494 uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
495
496 bool inlineHotFunctions(Function &F,
497 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
498 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
499 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
500 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
501 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
502 bool
503 tryInlineCandidate(InlineCandidate &Candidate,
504 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
505 bool
506 inlineHotFunctionsWithPriority(Function &F,
507 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
508 // Inline cold/small functions in addition to hot ones
509 bool shouldInlineColdCallee(CallBase &CallInst);
510 void emitOptimizationRemarksForInlineCandidates(
511 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
512 bool Hot);
513 void promoteMergeNotInlinedContextSamples(
515 const Function &F);
516 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
517 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
518 void generateMDProfMetadata(Function &F);
519 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
520 const SampleProfileMap &Profiles);
521
522 /// Map from function name to Function *. Used to find the function from
523 /// the function name. If the function name contains suffix, additional
524 /// entry is added to map from the stripped name to the function if there
525 /// is one-to-one mapping.
527
528 std::function<AssumptionCache &(Function &)> GetAC;
529 std::function<TargetTransformInfo &(Function &)> GetTTI;
530 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
531
532 /// Profile tracker for different context.
533 std::unique_ptr<SampleContextTracker> ContextTracker;
534
535 /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
536 ///
537 /// We need to know the LTO phase because for example in ThinLTOPrelink
538 /// phase, in annotation, we should not promote indirect calls. Instead,
539 /// we will mark GUIDs that needs to be annotated to the function.
540 const ThinOrFullLTOPhase LTOPhase;
541 const std::string AnnotatedPassName;
542
543 /// Profle Symbol list tells whether a function name appears in the binary
544 /// used to generate the current profile.
545 std::unique_ptr<ProfileSymbolList> PSL;
546
547 /// Total number of samples collected in this profile.
548 ///
549 /// This is the sum of all the samples collected in all the functions executed
550 /// at runtime.
551 uint64_t TotalCollectedSamples = 0;
552
553 // Information recorded when we declined to inline a call site
554 // because we have determined it is too cold is accumulated for
555 // each callee function. Initially this is just the entry count.
556 struct NotInlinedProfileInfo {
557 uint64_t entryCount;
558 };
560
561 // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
562 // all the function symbols defined or declared in current module.
563 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
564
565 // All the Names used in FunctionSamples including outline function
566 // names, inline instance names and call target names.
567 StringSet<> NamesInProfile;
568 // MD5 version of NamesInProfile. Either NamesInProfile or GUIDsInProfile is
569 // populated, depends on whether the profile uses MD5. Because the name table
570 // generally contains several magnitude more entries than the number of
571 // functions, we do not want to convert all names from one form to another.
572 llvm::DenseSet<uint64_t> GUIDsInProfile;
573
574 // For symbol in profile symbol list, whether to regard their profiles
575 // to be accurate. It is mainly decided by existance of profile symbol
576 // list and -profile-accurate-for-symsinlist flag, but it can be
577 // overriden by -profile-sample-accurate or profile-sample-accurate
578 // attribute.
579 bool ProfAccForSymsInList;
580
581 // External inline advisor used to replay inline decision from remarks.
582 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
583
584 // A helper to implement the sample profile matching algorithm.
585 std::unique_ptr<SampleProfileMatcher> MatchingManager;
586
587private:
588 const char *getAnnotatedRemarkPassName() const {
589 return AnnotatedPassName.c_str();
590 }
591};
592} // end anonymous namespace
593
594namespace llvm {
595template <>
597 return succ_empty(BB);
598}
599
600template <>
602 const std::vector<const BasicBlockT *> &BasicBlocks,
603 BlockEdgeMap &Successors, FlowFunction &Func) {
604 for (auto &Jump : Func.Jumps) {
605 const auto *BB = BasicBlocks[Jump.Source];
606 const auto *Succ = BasicBlocks[Jump.Target];
607 const Instruction *TI = BB->getTerminator();
608 // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
609 // In that case block Succ should be a landing pad
610 if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
611 if (isa<InvokeInst>(TI)) {
612 Jump.IsUnlikely = true;
613 }
614 }
615 const Instruction *SuccTI = Succ->getTerminator();
616 // Check if the target block contains UnreachableInst and mark it unlikely
617 if (SuccTI->getNumSuccessors() == 0) {
618 if (isa<UnreachableInst>(SuccTI)) {
619 Jump.IsUnlikely = true;
620 }
621 }
622 }
623}
624
625template <>
627 Function &F) {
628 DT.reset(new DominatorTree);
629 DT->recalculate(F);
630
631 PDT.reset(new PostDominatorTree(F));
632
633 LI.reset(new LoopInfo);
634 LI->analyze(*DT);
635}
636} // namespace llvm
637
638ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
640 return getProbeWeight(Inst);
641
642 const DebugLoc &DLoc = Inst.getDebugLoc();
643 if (!DLoc)
644 return std::error_code();
645
646 // Ignore all intrinsics, phinodes and branch instructions.
647 // Branch and phinodes instruction usually contains debug info from sources
648 // outside of the residing basic block, thus we ignore them during annotation.
649 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
650 return std::error_code();
651
652 // For non-CS profile, if a direct call/invoke instruction is inlined in
653 // profile (findCalleeFunctionSamples returns non-empty result), but not
654 // inlined here, it means that the inlined callsite has no sample, thus the
655 // call instruction should have 0 count.
656 // For CS profile, the callsite count of previously inlined callees is
657 // populated with the entry count of the callees.
659 if (const auto *CB = dyn_cast<CallBase>(&Inst))
660 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
661 return 0;
662
663 return getInstWeightImpl(Inst);
664}
665
666/// Get the FunctionSamples for a call instruction.
667///
668/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
669/// instance in which that call instruction is calling to. It contains
670/// all samples that resides in the inlined instance. We first find the
671/// inlined instance in which the call instruction is from, then we
672/// traverse its children to find the callsite with the matching
673/// location.
674///
675/// \param Inst Call/Invoke instruction to query.
676///
677/// \returns The FunctionSamples pointer to the inlined instance.
678const FunctionSamples *
679SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
680 const DILocation *DIL = Inst.getDebugLoc();
681 if (!DIL) {
682 return nullptr;
683 }
684
685 StringRef CalleeName;
686 if (Function *Callee = Inst.getCalledFunction())
687 CalleeName = Callee->getName();
688
690 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
691
692 const FunctionSamples *FS = findFunctionSamples(Inst);
693 if (FS == nullptr)
694 return nullptr;
695
696 return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
697 CalleeName, Reader->getRemapper());
698}
699
700/// Returns a vector of FunctionSamples that are the indirect call targets
701/// of \p Inst. The vector is sorted by the total number of samples. Stores
702/// the total call count of the indirect call in \p Sum.
703std::vector<const FunctionSamples *>
704SampleProfileLoader::findIndirectCallFunctionSamples(
705 const Instruction &Inst, uint64_t &Sum) const {
706 const DILocation *DIL = Inst.getDebugLoc();
707 std::vector<const FunctionSamples *> R;
708
709 if (!DIL) {
710 return R;
711 }
712
713 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
714 assert(L && R && "Expect non-null FunctionSamples");
715 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
716 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
717 return L->getGUID() < R->getGUID();
718 };
719
721 auto CalleeSamples =
722 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
723 if (CalleeSamples.empty())
724 return R;
725
726 // For CSSPGO, we only use target context profile's entry count
727 // as that already includes both inlined callee and non-inlined ones..
728 Sum = 0;
729 for (const auto *const FS : CalleeSamples) {
730 Sum += FS->getHeadSamplesEstimate();
731 R.push_back(FS);
732 }
733 llvm::sort(R, FSCompare);
734 return R;
735 }
736
737 const FunctionSamples *FS = findFunctionSamples(Inst);
738 if (FS == nullptr)
739 return R;
740
742 Sum = 0;
743 if (auto T = FS->findCallTargetMapAt(CallSite))
744 for (const auto &T_C : *T)
745 Sum += T_C.second;
746 if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
747 if (M->empty())
748 return R;
749 for (const auto &NameFS : *M) {
750 Sum += NameFS.second.getHeadSamplesEstimate();
751 R.push_back(&NameFS.second);
752 }
753 llvm::sort(R, FSCompare);
754 }
755 return R;
756}
757
758const FunctionSamples *
759SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
761 std::optional<PseudoProbe> Probe = extractProbe(Inst);
762 if (!Probe)
763 return nullptr;
764 }
765
766 const DILocation *DIL = Inst.getDebugLoc();
767 if (!DIL)
768 return Samples;
769
770 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
771 if (it.second) {
773 it.first->second = ContextTracker->getContextSamplesFor(DIL);
774 else
775 it.first->second =
776 Samples->findFunctionSamples(DIL, Reader->getRemapper());
777 }
778 return it.first->second;
779}
780
781/// Check whether the indirect call promotion history of \p Inst allows
782/// the promotion for \p Candidate.
783/// If the profile count for the promotion candidate \p Candidate is
784/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
785/// for \p Inst. If we already have at least MaxNumPromotions
786/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
787/// cannot promote for \p Inst anymore.
788static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
789 uint32_t NumVals = 0;
790 uint64_t TotalCount = 0;
791 std::unique_ptr<InstrProfValueData[]> ValueData =
792 std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
793 bool Valid =
794 getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
795 ValueData.get(), NumVals, TotalCount, true);
796 // No valid value profile so no promoted targets have been recorded
797 // before. Ok to do ICP.
798 if (!Valid)
799 return true;
800
801 unsigned NumPromoted = 0;
802 for (uint32_t I = 0; I < NumVals; I++) {
803 if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
804 continue;
805
806 // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
807 // metadata, it means the candidate has been promoted for this
808 // indirect call.
809 if (ValueData[I].Value == Function::getGUID(Candidate))
810 return false;
811 NumPromoted++;
812 // If already have MaxNumPromotions promotion, don't do it anymore.
813 if (NumPromoted == MaxNumPromotions)
814 return false;
815 }
816 return true;
817}
818
819/// Update indirect call target profile metadata for \p Inst.
820/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
821/// If it is 0, it means updateIDTMetaData is used to mark a
822/// certain target to be promoted already. If it is not zero,
823/// we expect to use it to update the total count in the value profile.
824static void
826 const SmallVectorImpl<InstrProfValueData> &CallTargets,
827 uint64_t Sum) {
828 // Bail out early if MaxNumPromotions is zero.
829 // This prevents allocating an array of zero length below.
830 //
831 // Note `updateIDTMetaData` is called in two places so check
832 // `MaxNumPromotions` inside it.
833 if (MaxNumPromotions == 0)
834 return;
835 uint32_t NumVals = 0;
836 // OldSum is the existing total count in the value profile data.
837 uint64_t OldSum = 0;
838 std::unique_ptr<InstrProfValueData[]> ValueData =
839 std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
840 bool Valid =
841 getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
842 ValueData.get(), NumVals, OldSum, true);
843
844 DenseMap<uint64_t, uint64_t> ValueCountMap;
845 if (Sum == 0) {
846 assert((CallTargets.size() == 1 &&
847 CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
848 "If sum is 0, assume only one element in CallTargets "
849 "with count being NOMORE_ICP_MAGICNUM");
850 // Initialize ValueCountMap with existing value profile data.
851 if (Valid) {
852 for (uint32_t I = 0; I < NumVals; I++)
853 ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
854 }
855 auto Pair =
856 ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
857 // If the target already exists in value profile, decrease the total
858 // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
859 if (!Pair.second) {
860 OldSum -= Pair.first->second;
861 Pair.first->second = NOMORE_ICP_MAGICNUM;
862 }
863 Sum = OldSum;
864 } else {
865 // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
866 // counts in the value profile.
867 if (Valid) {
868 for (uint32_t I = 0; I < NumVals; I++) {
869 if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
870 ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
871 }
872 }
873
874 for (const auto &Data : CallTargets) {
875 auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
876 if (Pair.second)
877 continue;
878 // The target represented by Data.Value has already been promoted.
879 // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
880 // Sum by Data.Count.
881 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
882 Sum -= Data.Count;
883 }
884 }
885
887 for (const auto &ValueCount : ValueCountMap) {
888 NewCallTargets.emplace_back(
889 InstrProfValueData{ValueCount.first, ValueCount.second});
890 }
891
892 llvm::sort(NewCallTargets,
893 [](const InstrProfValueData &L, const InstrProfValueData &R) {
894 if (L.Count != R.Count)
895 return L.Count > R.Count;
896 return L.Value > R.Value;
897 });
898
899 uint32_t MaxMDCount =
900 std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
902 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
903}
904
905/// Attempt to promote indirect call and also inline the promoted call.
906///
907/// \param F Caller function.
908/// \param Candidate ICP and inline candidate.
909/// \param SumOrigin Original sum of target counts for indirect call before
910/// promoting given candidate.
911/// \param Sum Prorated sum of remaining target counts for indirect call
912/// after promoting given candidate.
913/// \param InlinedCallSite Output vector for new call sites exposed after
914/// inlining.
915bool SampleProfileLoader::tryPromoteAndInlineCandidate(
916 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
917 SmallVector<CallBase *, 8> *InlinedCallSite) {
918 // Bail out early if sample-loader inliner is disabled.
920 return false;
921
922 // Bail out early if MaxNumPromotions is zero.
923 // This prevents allocating an array of zero length in callees below.
924 if (MaxNumPromotions == 0)
925 return false;
926 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
927 auto R = SymbolMap.find(CalleeFunctionName);
928 if (R == SymbolMap.end() || !R->second)
929 return false;
930
931 auto &CI = *Candidate.CallInstr;
932 if (!doesHistoryAllowICP(CI, R->second->getName()))
933 return false;
934
935 const char *Reason = "Callee function not available";
936 // R->getValue() != &F is to prevent promoting a recursive call.
937 // If it is a recursive call, we do not inline it as it could bloat
938 // the code exponentially. There is way to better handle this, e.g.
939 // clone the caller first, and inline the cloned caller if it is
940 // recursive. As llvm does not inline recursive calls, we will
941 // simply ignore it instead of handling it explicitly.
942 if (!R->second->isDeclaration() && R->second->getSubprogram() &&
943 R->second->hasFnAttribute("use-sample-profile") &&
944 R->second != &F && isLegalToPromote(CI, R->second, &Reason)) {
945 // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
946 // in the value profile metadata so the target won't be promoted again.
947 SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
948 Function::getGUID(R->second->getName()), NOMORE_ICP_MAGICNUM}};
949 updateIDTMetaData(CI, SortedCallTargets, 0);
950
951 auto *DI = &pgo::promoteIndirectCall(
952 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
953 if (DI) {
954 Sum -= Candidate.CallsiteCount;
955 // Do not prorate the indirect callsite distribution since the original
956 // distribution will be used to scale down non-promoted profile target
957 // counts later. By doing this we lose track of the real callsite count
958 // for the leftover indirect callsite as a trade off for accurate call
959 // target counts.
960 // TODO: Ideally we would have two separate factors, one for call site
961 // counts and one is used to prorate call target counts.
962 // Do not update the promoted direct callsite distribution at this
963 // point since the original distribution combined with the callee profile
964 // will be used to prorate callsites from the callee if inlined. Once not
965 // inlined, the direct callsite distribution should be prorated so that
966 // the it will reflect the real callsite counts.
967 Candidate.CallInstr = DI;
968 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
969 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
970 if (!Inlined) {
971 // Prorate the direct callsite distribution so that it reflects real
972 // callsite counts.
974 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
975 }
976 return Inlined;
977 }
978 }
979 } else {
980 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
982 Candidate.CallInstr->getName())<< " because "
983 << Reason << "\n");
984 }
985 return false;
986}
987
988bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
990 return false;
991
993 if (Callee == nullptr)
994 return false;
995
997 GetAC, GetTLI);
998
999 if (Cost.isNever())
1000 return false;
1001
1002 if (Cost.isAlways())
1003 return true;
1004
1005 return Cost.getCost() <= SampleColdCallSiteThreshold;
1006}
1007
1008void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1009 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1010 bool Hot) {
1011 for (auto *I : Candidates) {
1012 Function *CalledFunction = I->getCalledFunction();
1013 if (CalledFunction) {
1014 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1015 "InlineAttempt", I->getDebugLoc(),
1016 I->getParent())
1017 << "previous inlining reattempted for "
1018 << (Hot ? "hotness: '" : "size: '")
1019 << ore::NV("Callee", CalledFunction) << "' into '"
1020 << ore::NV("Caller", &F) << "'");
1021 }
1022 }
1023}
1024
1025void SampleProfileLoader::findExternalInlineCandidate(
1026 CallBase *CB, const FunctionSamples *Samples,
1027 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1028
1029 // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
1030 // function make sure it's imported
1031 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1032 // Samples may not exist for replayed function, if so
1033 // just add the direct GUID and move on
1034 if (!Samples) {
1035 InlinedGUIDs.insert(
1036 Function::getGUID(CB->getCalledFunction()->getName()));
1037 return;
1038 }
1039 // Otherwise, drop the threshold to import everything that we can
1040 Threshold = 0;
1041 }
1042
1043 // In some rare cases, call instruction could be changed after being pushed
1044 // into inline candidate queue, this is because earlier inlining may expose
1045 // constant propagation which can change indirect call to direct call. When
1046 // this happens, we may fail to find matching function samples for the
1047 // candidate later, even if a match was found when the candidate was enqueued.
1048 if (!Samples)
1049 return;
1050
1051 // For AutoFDO profile, retrieve candidate profiles by walking over
1052 // the nested inlinee profiles.
1054 // Set threshold to zero to honor pre-inliner decision.
1056 Threshold = 0;
1057 Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1058 return;
1059 }
1060
1061 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1062 std::queue<ContextTrieNode *> CalleeList;
1063 CalleeList.push(Caller);
1064 while (!CalleeList.empty()) {
1065 ContextTrieNode *Node = CalleeList.front();
1066 CalleeList.pop();
1067 FunctionSamples *CalleeSample = Node->getFunctionSamples();
1068 // For CSSPGO profile, retrieve candidate profile by walking over the
1069 // trie built for context profile. Note that also take call targets
1070 // even if callee doesn't have a corresponding context profile.
1071 if (!CalleeSample)
1072 continue;
1073
1074 // If pre-inliner decision is used, honor that for importing as well.
1075 bool PreInline =
1078 if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1079 continue;
1080
1081 Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
1082 // Add to the import list only when it's defined out of module.
1083 if (!Func || Func->isDeclaration())
1084 InlinedGUIDs.insert(CalleeSample->getGUID());
1085
1086 // Import hot CallTargets, which may not be available in IR because full
1087 // profile annotation cannot be done until backend compilation in ThinLTO.
1088 for (const auto &BS : CalleeSample->getBodySamples())
1089 for (const auto &TS : BS.second.getCallTargets())
1090 if (TS.second > Threshold) {
1091 const Function *Callee = SymbolMap.lookup(TS.first);
1092 if (!Callee || Callee->isDeclaration())
1093 InlinedGUIDs.insert(TS.first.getHashCode());
1094 }
1095
1096 // Import hot child context profile associted with callees. Note that this
1097 // may have some overlap with the call target loop above, but doing this
1098 // based child context profile again effectively allow us to use the max of
1099 // entry count and call target count to determine importing.
1100 for (auto &Child : Node->getAllChildContext()) {
1101 ContextTrieNode *CalleeNode = &Child.second;
1102 CalleeList.push(CalleeNode);
1103 }
1104 }
1105}
1106
1107/// Iteratively inline hot callsites of a function.
1108///
1109/// Iteratively traverse all callsites of the function \p F, so as to
1110/// find out callsites with corresponding inline instances.
1111///
1112/// For such callsites,
1113/// - If it is hot enough, inline the callsites and adds callsites of the callee
1114/// into the caller. If the call is an indirect call, first promote
1115/// it to direct call. Each indirect call is limited with a single target.
1116///
1117/// - If a callsite is not inlined, merge the its profile to the outline
1118/// version (if --sample-profile-merge-inlinee is true), or scale the
1119/// counters of standalone function based on the profile of inlined
1120/// instances (if --sample-profile-merge-inlinee is false).
1121///
1122/// Later passes may consume the updated profiles.
1123///
1124/// \param F function to perform iterative inlining.
1125/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1126/// inlined in the profiled binary.
1127///
1128/// \returns True if there is any inline happened.
1129bool SampleProfileLoader::inlineHotFunctions(
1130 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1131 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1132 // Profile symbol list is ignored when profile-sample-accurate is on.
1133 assert((!ProfAccForSymsInList ||
1135 !F.hasFnAttribute("profile-sample-accurate"))) &&
1136 "ProfAccForSymsInList should be false when profile-sample-accurate "
1137 "is enabled");
1138
1139 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1140 bool Changed = false;
1141 bool LocalChanged = true;
1142 while (LocalChanged) {
1143 LocalChanged = false;
1145 for (auto &BB : F) {
1146 bool Hot = false;
1147 SmallVector<CallBase *, 10> AllCandidates;
1148 SmallVector<CallBase *, 10> ColdCandidates;
1149 for (auto &I : BB) {
1150 const FunctionSamples *FS = nullptr;
1151 if (auto *CB = dyn_cast<CallBase>(&I)) {
1152 if (!isa<IntrinsicInst>(I)) {
1153 if ((FS = findCalleeFunctionSamples(*CB))) {
1154 assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1155 "GUIDToFuncNameMap has to be populated");
1156 AllCandidates.push_back(CB);
1157 if (FS->getHeadSamplesEstimate() > 0 ||
1159 LocalNotInlinedCallSites.insert({CB, FS});
1160 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1161 Hot = true;
1162 else if (shouldInlineColdCallee(*CB))
1163 ColdCandidates.push_back(CB);
1164 } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1165 AllCandidates.push_back(CB);
1166 }
1167 }
1168 }
1169 }
1170 if (Hot || ExternalInlineAdvisor) {
1171 CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1172 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1173 } else {
1174 CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1175 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1176 }
1177 }
1178 for (CallBase *I : CIS) {
1179 Function *CalledFunction = I->getCalledFunction();
1180 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1181 0 /* dummy count */,
1182 1.0 /* dummy distribution factor */};
1183 // Do not inline recursive calls.
1184 if (CalledFunction == &F)
1185 continue;
1186 if (I->isIndirectCall()) {
1187 uint64_t Sum;
1188 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1189 uint64_t SumOrigin = Sum;
1190 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1191 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1192 PSI->getOrCompHotCountThreshold());
1193 continue;
1194 }
1195 if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1196 continue;
1197
1198 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1199 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1200 LocalNotInlinedCallSites.erase(I);
1201 LocalChanged = true;
1202 }
1203 }
1204 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1205 !CalledFunction->isDeclaration()) {
1206 if (tryInlineCandidate(Candidate)) {
1207 LocalNotInlinedCallSites.erase(I);
1208 LocalChanged = true;
1209 }
1210 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1211 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1212 InlinedGUIDs,
1213 PSI->getOrCompHotCountThreshold());
1214 }
1215 }
1216 Changed |= LocalChanged;
1217 }
1218
1219 // For CS profile, profile for not inlined context will be merged when
1220 // base profile is being retrieved.
1222 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1223 return Changed;
1224}
1225
1226bool SampleProfileLoader::tryInlineCandidate(
1227 InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1228 // Do not attempt to inline a candidate if
1229 // --disable-sample-loader-inlining is true.
1231 return false;
1232
1233 CallBase &CB = *Candidate.CallInstr;
1234 Function *CalledFunction = CB.getCalledFunction();
1235 assert(CalledFunction && "Expect a callee with definition");
1236 DebugLoc DLoc = CB.getDebugLoc();
1237 BasicBlock *BB = CB.getParent();
1238
1239 InlineCost Cost = shouldInlineCandidate(Candidate);
1240 if (Cost.isNever()) {
1241 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1242 "InlineFail", DLoc, BB)
1243 << "incompatible inlining");
1244 return false;
1245 }
1246
1247 if (!Cost)
1248 return false;
1249
1250 InlineFunctionInfo IFI(GetAC);
1251 IFI.UpdateProfile = false;
1252 InlineResult IR = InlineFunction(CB, IFI,
1253 /*MergeAttributes=*/true);
1254 if (!IR.isSuccess())
1255 return false;
1256
1257 // The call to InlineFunction erases I, so we can't pass it here.
1258 emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1259 Cost, true, getAnnotatedRemarkPassName());
1260
1261 // Now populate the list of newly exposed call sites.
1262 if (InlinedCallSites) {
1263 InlinedCallSites->clear();
1264 for (auto &I : IFI.InlinedCallSites)
1265 InlinedCallSites->push_back(I);
1266 }
1267
1269 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1270 ++NumCSInlined;
1271
1272 // Prorate inlined probes for a duplicated inlining callsite which probably
1273 // has a distribution less than 100%. Samples for an inlinee should be
1274 // distributed among the copies of the original callsite based on each
1275 // callsite's distribution factor for counts accuracy. Note that an inlined
1276 // probe may come with its own distribution factor if it has been duplicated
1277 // in the inlinee body. The two factor are multiplied to reflect the
1278 // aggregation of duplication.
1279 if (Candidate.CallsiteDistribution < 1) {
1280 for (auto &I : IFI.InlinedCallSites) {
1281 if (std::optional<PseudoProbe> Probe = extractProbe(*I))
1282 setProbeDistributionFactor(*I, Probe->Factor *
1283 Candidate.CallsiteDistribution);
1284 }
1285 NumDuplicatedInlinesite++;
1286 }
1287
1288 return true;
1289}
1290
1291bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1292 CallBase *CB) {
1293 assert(CB && "Expect non-null call instruction");
1294
1295 if (isa<IntrinsicInst>(CB))
1296 return false;
1297
1298 // Find the callee's profile. For indirect call, find hottest target profile.
1299 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1300 // If ExternalInlineAdvisor wants to inline this site, do so even
1301 // if Samples are not present.
1302 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1303 return false;
1304
1305 float Factor = 1.0;
1306 if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
1307 Factor = Probe->Factor;
1308
1309 uint64_t CallsiteCount =
1310 CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1311 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1312 return true;
1313}
1314
1315std::optional<InlineCost>
1316SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1317 std::unique_ptr<InlineAdvice> Advice = nullptr;
1318 if (ExternalInlineAdvisor) {
1319 Advice = ExternalInlineAdvisor->getAdvice(CB);
1320 if (Advice) {
1321 if (!Advice->isInliningRecommended()) {
1322 Advice->recordUnattemptedInlining();
1323 return InlineCost::getNever("not previously inlined");
1324 }
1325 Advice->recordInlining();
1326 return InlineCost::getAlways("previously inlined");
1327 }
1328 }
1329
1330 return {};
1331}
1332
1333bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1334 std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1335 return Cost ? !!*Cost : false;
1336}
1337
1339SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1340 if (std::optional<InlineCost> ReplayCost =
1341 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1342 return *ReplayCost;
1343 // Adjust threshold based on call site hotness, only do this for callsite
1344 // prioritized inliner because otherwise cost-benefit check is done earlier.
1345 int SampleThreshold = SampleColdCallSiteThreshold;
1347 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1348 SampleThreshold = SampleHotCallSiteThreshold;
1349 else if (!ProfileSizeInline)
1350 return InlineCost::getNever("cold callsite");
1351 }
1352
1353 Function *Callee = Candidate.CallInstr->getCalledFunction();
1354 assert(Callee && "Expect a definition for inline candidate of direct call");
1355
1356 InlineParams Params = getInlineParams();
1357 // We will ignore the threshold from inline cost, so always get full cost.
1358 Params.ComputeFullInlineCost = true;
1360 // Checks if there is anything in the reachable portion of the callee at
1361 // this callsite that makes this inlining potentially illegal. Need to
1362 // set ComputeFullInlineCost, otherwise getInlineCost may return early
1363 // when cost exceeds threshold without checking all IRs in the callee.
1364 // The acutal cost does not matter because we only checks isNever() to
1365 // see if it is legal to inline the callsite.
1366 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1367 GetTTI(*Callee), GetAC, GetTLI);
1368
1369 // Honor always inline and never inline from call analyzer
1370 if (Cost.isNever() || Cost.isAlways())
1371 return Cost;
1372
1373 // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1374 // decisions based on hotness as well as accurate function byte sizes for
1375 // given context using function/inlinee sizes from previous build. It
1376 // stores the decision in profile, and also adjust/merge context profile
1377 // aiming at better context-sensitive post-inline profile quality, assuming
1378 // all inline decision estimates are going to be honored by compiler. Here
1379 // we replay that inline decision under `sample-profile-use-preinliner`.
1380 // Note that we don't need to handle negative decision from preinliner as
1381 // context profile for not inlined calls are merged by preinliner already.
1382 if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1383 // Once two node are merged due to promotion, we're losing some context
1384 // so the original context-sensitive preinliner decision should be ignored
1385 // for SyntheticContext.
1386 SampleContext &Context = Candidate.CalleeSamples->getContext();
1387 if (!Context.hasState(SyntheticContext) &&
1388 Context.hasAttribute(ContextShouldBeInlined))
1389 return InlineCost::getAlways("preinliner");
1390 }
1391
1392 // For old FDO inliner, we inline the call site as long as cost is not
1393 // "Never". The cost-benefit check is done earlier.
1395 return InlineCost::get(Cost.getCost(), INT_MAX);
1396 }
1397
1398 // Otherwise only use the cost from call analyzer, but overwite threshold with
1399 // Sample PGO threshold.
1400 return InlineCost::get(Cost.getCost(), SampleThreshold);
1401}
1402
1403bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1404 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1405 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1406 // Profile symbol list is ignored when profile-sample-accurate is on.
1407 assert((!ProfAccForSymsInList ||
1409 !F.hasFnAttribute("profile-sample-accurate"))) &&
1410 "ProfAccForSymsInList should be false when profile-sample-accurate "
1411 "is enabled");
1412
1413 // Populating worklist with initial call sites from root inliner, along
1414 // with call site weights.
1415 CandidateQueue CQueue;
1416 InlineCandidate NewCandidate;
1417 for (auto &BB : F) {
1418 for (auto &I : BB) {
1419 auto *CB = dyn_cast<CallBase>(&I);
1420 if (!CB)
1421 continue;
1422 if (getInlineCandidate(&NewCandidate, CB))
1423 CQueue.push(NewCandidate);
1424 }
1425 }
1426
1427 // Cap the size growth from profile guided inlining. This is needed even
1428 // though cost of each inline candidate already accounts for callee size,
1429 // because with top-down inlining, we can grow inliner size significantly
1430 // with large number of smaller inlinees each pass the cost check.
1432 "Max inline size limit should not be smaller than min inline size "
1433 "limit.");
1434 unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1435 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
1436 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
1437 if (ExternalInlineAdvisor)
1438 SizeLimit = std::numeric_limits<unsigned>::max();
1439
1440 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1441
1442 // Perform iterative BFS call site prioritized inlining
1443 bool Changed = false;
1444 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1445 InlineCandidate Candidate = CQueue.top();
1446 CQueue.pop();
1447 CallBase *I = Candidate.CallInstr;
1448 Function *CalledFunction = I->getCalledFunction();
1449
1450 if (CalledFunction == &F)
1451 continue;
1452 if (I->isIndirectCall()) {
1453 uint64_t Sum = 0;
1454 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1455 uint64_t SumOrigin = Sum;
1456 Sum *= Candidate.CallsiteDistribution;
1457 unsigned ICPCount = 0;
1458 for (const auto *FS : CalleeSamples) {
1459 // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1460 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1461 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1462 PSI->getOrCompHotCountThreshold());
1463 continue;
1464 }
1465 uint64_t EntryCountDistributed =
1466 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1467 // In addition to regular inline cost check, we also need to make sure
1468 // ICP isn't introducing excessive speculative checks even if individual
1469 // target looks beneficial to promote and inline. That means we should
1470 // only do ICP when there's a small number dominant targets.
1471 if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1472 EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1473 break;
1474 // TODO: Fix CallAnalyzer to handle all indirect calls.
1475 // For indirect call, we don't run CallAnalyzer to get InlineCost
1476 // before actual inlining. This is because we could see two different
1477 // types from the same definition, which makes CallAnalyzer choke as
1478 // it's expecting matching parameter type on both caller and callee
1479 // side. See example from PR18962 for the triggering cases (the bug was
1480 // fixed, but we generate different types).
1481 if (!PSI->isHotCount(EntryCountDistributed))
1482 break;
1483 SmallVector<CallBase *, 8> InlinedCallSites;
1484 // Attach function profile for promoted indirect callee, and update
1485 // call site count for the promoted inline candidate too.
1486 Candidate = {I, FS, EntryCountDistributed,
1487 Candidate.CallsiteDistribution};
1488 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1489 &InlinedCallSites)) {
1490 for (auto *CB : InlinedCallSites) {
1491 if (getInlineCandidate(&NewCandidate, CB))
1492 CQueue.emplace(NewCandidate);
1493 }
1494 ICPCount++;
1495 Changed = true;
1496 } else if (!ContextTracker) {
1497 LocalNotInlinedCallSites.insert({I, FS});
1498 }
1499 }
1500 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1501 !CalledFunction->isDeclaration()) {
1502 SmallVector<CallBase *, 8> InlinedCallSites;
1503 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1504 for (auto *CB : InlinedCallSites) {
1505 if (getInlineCandidate(&NewCandidate, CB))
1506 CQueue.emplace(NewCandidate);
1507 }
1508 Changed = true;
1509 } else if (!ContextTracker) {
1510 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1511 }
1512 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1513 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1514 InlinedGUIDs,
1515 PSI->getOrCompHotCountThreshold());
1516 }
1517 }
1518
1519 if (!CQueue.empty()) {
1520 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1521 ++NumCSInlinedHitMaxLimit;
1522 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1523 ++NumCSInlinedHitMinLimit;
1524 else
1525 ++NumCSInlinedHitGrowthLimit;
1526 }
1527
1528 // For CS profile, profile for not inlined context will be merged when
1529 // base profile is being retrieved.
1531 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1532 return Changed;
1533}
1534
1535void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1537 const Function &F) {
1538 // Accumulate not inlined callsite information into notInlinedSamples
1539 for (const auto &Pair : NonInlinedCallSites) {
1540 CallBase *I = Pair.first;
1541 Function *Callee = I->getCalledFunction();
1542 if (!Callee || Callee->isDeclaration())
1543 continue;
1544
1545 ORE->emit(
1546 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1547 I->getDebugLoc(), I->getParent())
1548 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1549 << "' into '" << ore::NV("Caller", &F) << "'");
1550
1551 ++NumCSNotInlined;
1552 const FunctionSamples *FS = Pair.second;
1553 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1554 continue;
1555 }
1556
1557 // Do not merge a context that is already duplicated into the base profile.
1558 if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1559 continue;
1560
1561 if (ProfileMergeInlinee) {
1562 // A function call can be replicated by optimizations like callsite
1563 // splitting or jump threading and the replicates end up sharing the
1564 // sample nested callee profile instead of slicing the original
1565 // inlinee's profile. We want to do merge exactly once by filtering out
1566 // callee profiles with a non-zero head sample count.
1567 if (FS->getHeadSamples() == 0) {
1568 // Use entry samples as head samples during the merge, as inlinees
1569 // don't have head samples.
1570 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1571 FS->getHeadSamplesEstimate());
1572
1573 // Note that we have to do the merge right after processing function.
1574 // This allows OutlineFS's profile to be used for annotation during
1575 // top-down processing of functions' annotation.
1576 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1577 // If outlined function does not exist in the profile, add it to a
1578 // separate map so that it does not rehash the original profile.
1579 if (!OutlineFS)
1580 OutlineFS = &OutlineFunctionSamples[
1582 OutlineFS->merge(*FS, 1);
1583 // Set outlined profile to be synthetic to not bias the inliner.
1584 OutlineFS->SetContextSynthetic();
1585 }
1586 } else {
1587 auto pair =
1588 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1589 pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1590 }
1591 }
1592}
1593
1594/// Returns the sorted CallTargetMap \p M by count in descending order.
1598 for (const auto &I : SampleRecord::SortCallTargets(M)) {
1599 R.emplace_back(
1600 InstrProfValueData{I.first.getHashCode(), I.second});
1601 }
1602 return R;
1603}
1604
1605// Generate MD_prof metadata for every branch instruction using the
1606// edge weights computed during propagation.
1607void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1608 // Generate MD_prof metadata for every branch instruction using the
1609 // edge weights computed during propagation.
1610 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1611 LLVMContext &Ctx = F.getContext();
1612 MDBuilder MDB(Ctx);
1613 for (auto &BI : F) {
1614 BasicBlock *BB = &BI;
1615
1616 if (BlockWeights[BB]) {
1617 for (auto &I : *BB) {
1618 if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1619 continue;
1620 if (!cast<CallBase>(I).getCalledFunction()) {
1621 const DebugLoc &DLoc = I.getDebugLoc();
1622 if (!DLoc)
1623 continue;
1624 const DILocation *DIL = DLoc;
1625 const FunctionSamples *FS = findFunctionSamples(I);
1626 if (!FS)
1627 continue;
1630 FS->findCallTargetMapAt(CallSite);
1631 if (!T || T.get().empty())
1632 continue;
1634 // Prorate the callsite counts based on the pre-ICP distribution
1635 // factor to reflect what is already done to the callsite before
1636 // ICP, such as calliste cloning.
1637 if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
1638 if (Probe->Factor < 1)
1639 T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1640 }
1641 }
1642 SmallVector<InstrProfValueData, 2> SortedCallTargets =
1644 uint64_t Sum = 0;
1645 for (const auto &C : T.get())
1646 Sum += C.second;
1647 // With CSSPGO all indirect call targets are counted torwards the
1648 // original indirect call site in the profile, including both
1649 // inlined and non-inlined targets.
1651 if (const FunctionSamplesMap *M =
1652 FS->findFunctionSamplesMapAt(CallSite)) {
1653 for (const auto &NameFS : *M)
1654 Sum += NameFS.second.getHeadSamplesEstimate();
1655 }
1656 }
1657 if (Sum)
1658 updateIDTMetaData(I, SortedCallTargets, Sum);
1659 else if (OverwriteExistingWeights)
1660 I.setMetadata(LLVMContext::MD_prof, nullptr);
1661 } else if (!isa<IntrinsicInst>(&I)) {
1662 setBranchWeights(I, {static_cast<uint32_t>(BlockWeights[BB])});
1663 }
1664 }
1666 // Set profile metadata (possibly annotated by LTO prelink) to zero or
1667 // clear it for cold code.
1668 for (auto &I : *BB) {
1669 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1670 if (cast<CallBase>(I).isIndirectCall()) {
1671 I.setMetadata(LLVMContext::MD_prof, nullptr);
1672 } else {
1674 }
1675 }
1676 }
1677 }
1678
1679 Instruction *TI = BB->getTerminator();
1680 if (TI->getNumSuccessors() == 1)
1681 continue;
1682 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1683 !isa<IndirectBrInst>(TI))
1684 continue;
1685
1686 DebugLoc BranchLoc = TI->getDebugLoc();
1687 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1688 << ((BranchLoc) ? Twine(BranchLoc.getLine())
1689 : Twine("<UNKNOWN LOCATION>"))
1690 << ".\n");
1692 uint32_t MaxWeight = 0;
1693 Instruction *MaxDestInst;
1694 // Since profi treats multiple edges (multiway branches) as a single edge,
1695 // we need to distribute the computed weight among the branches. We do
1696 // this by evenly splitting the edge weight among destinations.
1698 std::vector<uint64_t> EdgeIndex;
1700 EdgeIndex.resize(TI->getNumSuccessors());
1701 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1702 const BasicBlock *Succ = TI->getSuccessor(I);
1703 EdgeIndex[I] = EdgeMultiplicity[Succ];
1704 EdgeMultiplicity[Succ]++;
1705 }
1706 }
1707 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1708 BasicBlock *Succ = TI->getSuccessor(I);
1709 Edge E = std::make_pair(BB, Succ);
1710 uint64_t Weight = EdgeWeights[E];
1711 LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1712 // Use uint32_t saturated arithmetic to adjust the incoming weights,
1713 // if needed. Sample counts in profiles are 64-bit unsigned values,
1714 // but internally branch weights are expressed as 32-bit values.
1715 if (Weight > std::numeric_limits<uint32_t>::max()) {
1716 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1717 Weight = std::numeric_limits<uint32_t>::max();
1718 }
1719 if (!SampleProfileUseProfi) {
1720 // Weight is added by one to avoid propagation errors introduced by
1721 // 0 weights.
1722 Weights.push_back(static_cast<uint32_t>(Weight + 1));
1723 } else {
1724 // Profi creates proper weights that do not require "+1" adjustments but
1725 // we evenly split the weight among branches with the same destination.
1726 uint64_t W = Weight / EdgeMultiplicity[Succ];
1727 // Rounding up, if needed, so that first branches are hotter.
1728 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1729 W++;
1730 Weights.push_back(static_cast<uint32_t>(W));
1731 }
1732 if (Weight != 0) {
1733 if (Weight > MaxWeight) {
1734 MaxWeight = Weight;
1735 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1736 }
1737 }
1738 }
1739
1740 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1741
1742 uint64_t TempWeight;
1743 // Only set weights if there is at least one non-zero weight.
1744 // In any other case, let the analyzer set weights.
1745 // Do not set weights if the weights are present unless under
1746 // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1747 // twice. If the first annotation already set the weights, the second pass
1748 // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1749 // weight should have their existing metadata (possibly annotated by LTO
1750 // prelink) cleared.
1751 if (MaxWeight > 0 &&
1752 (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1753 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1754 setBranchWeights(*TI, Weights);
1755 ORE->emit([&]() {
1756 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1757 << "most popular destination for conditional branches at "
1758 << ore::NV("CondBranchesLoc", BranchLoc);
1759 });
1760 } else {
1762 TI->setMetadata(LLVMContext::MD_prof, nullptr);
1763 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1764 } else {
1765 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1766 }
1767 }
1768 }
1769}
1770
1771/// Once all the branch weights are computed, we emit the MD_prof
1772/// metadata on BB using the computed values for each of its branches.
1773///
1774/// \param F The function to query.
1775///
1776/// \returns true if \p F was modified. Returns false, otherwise.
1777bool SampleProfileLoader::emitAnnotations(Function &F) {
1778 bool Changed = false;
1779
1781 LLVM_DEBUG({
1782 if (!ProbeManager->getDesc(F))
1783 dbgs() << "Probe descriptor missing for Function " << F.getName()
1784 << "\n";
1785 });
1786
1787 if (ProbeManager->profileIsValid(F, *Samples)) {
1788 ++NumMatchedProfile;
1789 } else {
1790 ++NumMismatchedProfile;
1791 LLVM_DEBUG(
1792 dbgs() << "Profile is invalid due to CFG mismatch for Function "
1793 << F.getName() << "\n");
1795 return false;
1796 }
1797 } else {
1798 if (getFunctionLoc(F) == 0)
1799 return false;
1800
1801 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1802 << F.getName() << ": " << getFunctionLoc(F) << "\n");
1803 }
1804
1805 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1807 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1808 else
1809 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1810
1811 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1812
1813 if (Changed)
1814 generateMDProfMetadata(F);
1815
1816 emitCoverageRemarks(F);
1817 return Changed;
1818}
1819
1820std::unique_ptr<ProfiledCallGraph>
1821SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1822 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1824 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1825 else
1826 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1827
1828 // Add all functions into the profiled call graph even if they are not in
1829 // the profile. This makes sure functions missing from the profile still
1830 // gets a chance to be processed.
1831 for (Function &F : M) {
1833 continue;
1834 ProfiledCG->addProfiledFunction(
1836 }
1837
1838 return ProfiledCG;
1839}
1840
1841std::vector<Function *>
1842SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1843 std::vector<Function *> FunctionOrderList;
1844 FunctionOrderList.reserve(M.size());
1845
1847 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1848 "together with -sample-profile-top-down-load.\n";
1849
1850 if (!ProfileTopDownLoad) {
1851 if (ProfileMergeInlinee) {
1852 // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1853 // because the profile for a function may be used for the profile
1854 // annotation of its outline copy before the profile merging of its
1855 // non-inlined inline instances, and that is not the way how
1856 // ProfileMergeInlinee is supposed to work.
1857 ProfileMergeInlinee = false;
1858 }
1859
1860 for (Function &F : M)
1862 FunctionOrderList.push_back(&F);
1863 return FunctionOrderList;
1864 }
1865
1867 !UseProfiledCallGraph.getNumOccurrences())) {
1868 // Use profiled call edges to augment the top-down order. There are cases
1869 // that the top-down order computed based on the static call graph doesn't
1870 // reflect real execution order. For example
1871 //
1872 // 1. Incomplete static call graph due to unknown indirect call targets.
1873 // Adjusting the order by considering indirect call edges from the
1874 // profile can enable the inlining of indirect call targets by allowing
1875 // the caller processed before them.
1876 // 2. Mutual call edges in an SCC. The static processing order computed for
1877 // an SCC may not reflect the call contexts in the context-sensitive
1878 // profile, thus may cause potential inlining to be overlooked. The
1879 // function order in one SCC is being adjusted to a top-down order based
1880 // on the profile to favor more inlining. This is only a problem with CS
1881 // profile.
1882 // 3. Transitive indirect call edges due to inlining. When a callee function
1883 // (say B) is inlined into a caller function (say A) in LTO prelink,
1884 // every call edge originated from the callee B will be transferred to
1885 // the caller A. If any transferred edge (say A->C) is indirect, the
1886 // original profiled indirect edge B->C, even if considered, would not
1887 // enforce a top-down order from the caller A to the potential indirect
1888 // call target C in LTO postlink since the inlined callee B is gone from
1889 // the static call graph.
1890 // 4. #3 can happen even for direct call targets, due to functions defined
1891 // in header files. A header function (say A), when included into source
1892 // files, is defined multiple times but only one definition survives due
1893 // to ODR. Therefore, the LTO prelink inlining done on those dropped
1894 // definitions can be useless based on a local file scope. More
1895 // importantly, the inlinee (say B), once fully inlined to a
1896 // to-be-dropped A, will have no profile to consume when its outlined
1897 // version is compiled. This can lead to a profile-less prelink
1898 // compilation for the outlined version of B which may be called from
1899 // external modules. while this isn't easy to fix, we rely on the
1900 // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1901 // the A can be inlined in its local scope in prelink, it may not exist
1902 // in the merged IR in postlink, and we'll need the profiled call edges
1903 // to enforce a top-down order for the rest of the functions.
1904 //
1905 // Considering those cases, a profiled call graph completely independent of
1906 // the static call graph is constructed based on profile data, where
1907 // function objects are not even needed to handle case #3 and case 4.
1908 //
1909 // Note that static callgraph edges are completely ignored since they
1910 // can be conflicting with profiled edges for cyclic SCCs and may result in
1911 // an SCC order incompatible with profile-defined one. Using strictly
1912 // profile order ensures a maximum inlining experience. On the other hand,
1913 // static call edges are not so important when they don't correspond to a
1914 // context in the profile.
1915
1916 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1917 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1918 while (!CGI.isAtEnd()) {
1919 auto Range = *CGI;
1920 if (SortProfiledSCC) {
1921 // Sort nodes in one SCC based on callsite hotness.
1923 Range = *SI;
1924 }
1925 for (auto *Node : Range) {
1926 Function *F = SymbolMap.lookup(Node->Name);
1927 if (F && !skipProfileForFunction(*F))
1928 FunctionOrderList.push_back(F);
1929 }
1930 ++CGI;
1931 }
1932 } else {
1933 CG.buildRefSCCs();
1934 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
1935 for (LazyCallGraph::SCC &C : RC) {
1936 for (LazyCallGraph::Node &N : C) {
1937 Function &F = N.getFunction();
1939 FunctionOrderList.push_back(&F);
1940 }
1941 }
1942 }
1943 }
1944
1945 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1946
1947 LLVM_DEBUG({
1948 dbgs() << "Function processing order:\n";
1949 for (auto F : FunctionOrderList) {
1950 dbgs() << F->getName() << "\n";
1951 }
1952 });
1953
1954 return FunctionOrderList;
1955}
1956
1957bool SampleProfileLoader::doInitialization(Module &M,
1959 auto &Ctx = M.getContext();
1960
1961 auto ReaderOrErr = SampleProfileReader::create(
1962 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1963 if (std::error_code EC = ReaderOrErr.getError()) {
1964 std::string Msg = "Could not open profile: " + EC.message();
1965 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1966 return false;
1967 }
1968 Reader = std::move(ReaderOrErr.get());
1969 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1970 // set module before reading the profile so reader may be able to only
1971 // read the function profiles which are used by the current module.
1972 Reader->setModule(&M);
1973 if (std::error_code EC = Reader->read()) {
1974 std::string Msg = "profile reading failed: " + EC.message();
1975 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1976 return false;
1977 }
1978
1979 PSL = Reader->getProfileSymbolList();
1980
1981 // While profile-sample-accurate is on, ignore symbol list.
1982 ProfAccForSymsInList =
1984 if (ProfAccForSymsInList) {
1985 NamesInProfile.clear();
1986 GUIDsInProfile.clear();
1987 if (auto NameTable = Reader->getNameTable()) {
1989 for (auto Name : *NameTable)
1990 GUIDsInProfile.insert(Name.getHashCode());
1991 } else {
1992 for (auto Name : *NameTable)
1993 NamesInProfile.insert(Name.stringRef());
1994 }
1995 }
1996 CoverageTracker.setProfAccForSymsInList(true);
1997 }
1998
1999 if (FAM && !ProfileInlineReplayFile.empty()) {
2000 ExternalInlineAdvisor = getReplayInlineAdvisor(
2001 M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2006 /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2007 }
2008
2009 // Apply tweaks if context-sensitive or probe-based profile is available.
2010 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2011 Reader->profileIsProbeBased()) {
2012 if (!UseIterativeBFIInference.getNumOccurrences())
2014 if (!SampleProfileUseProfi.getNumOccurrences())
2015 SampleProfileUseProfi = true;
2016 if (!EnableExtTspBlockPlacement.getNumOccurrences())
2018 // Enable priority-base inliner and size inline by default for CSSPGO.
2019 if (!ProfileSizeInline.getNumOccurrences())
2020 ProfileSizeInline = true;
2021 if (!CallsitePrioritizedInline.getNumOccurrences())
2023 // For CSSPGO, we also allow recursive inline to best use context profile.
2024 if (!AllowRecursiveInline.getNumOccurrences())
2025 AllowRecursiveInline = true;
2026
2027 if (Reader->profileIsPreInlined()) {
2028 if (!UsePreInlinerDecision.getNumOccurrences())
2029 UsePreInlinerDecision = true;
2030 }
2031
2032 // Enable stale profile matching by default for probe-based profile.
2033 // Currently the matching relies on if the checksum mismatch is detected,
2034 // which is currently only available for pseudo-probe mode. Removing the
2035 // checksum check could cause regressions for some cases, so further tuning
2036 // might be needed if we want to enable it for all cases.
2037 if (Reader->profileIsProbeBased() &&
2038 !SalvageStaleProfile.getNumOccurrences()) {
2039 SalvageStaleProfile = true;
2040 }
2041
2042 if (!Reader->profileIsCS()) {
2043 // Non-CS profile should be fine without a function size budget for the
2044 // inliner since the contexts in the profile are either all from inlining
2045 // in the prevoius build or pre-computed by the preinliner with a size
2046 // cap, thus they are bounded.
2047 if (!ProfileInlineLimitMin.getNumOccurrences())
2048 ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
2049 if (!ProfileInlineLimitMax.getNumOccurrences())
2050 ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
2051 }
2052 }
2053
2054 if (Reader->profileIsCS()) {
2055 // Tracker for profiles under different context
2056 ContextTracker = std::make_unique<SampleContextTracker>(
2057 Reader->getProfiles(), &GUIDToFuncNameMap);
2058 }
2059
2060 // Load pseudo probe descriptors for probe-based function samples.
2061 if (Reader->profileIsProbeBased()) {
2062 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2063 if (!ProbeManager->moduleIsProbed(M)) {
2064 const char *Msg =
2065 "Pseudo-probe-based profile requires SampleProfileProbePass";
2066 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2067 DS_Warning));
2068 return false;
2069 }
2070 }
2071
2074 MatchingManager = std::make_unique<SampleProfileMatcher>(
2075 M, *Reader, ProbeManager.get(), LTOPhase);
2076 }
2077
2078 return true;
2079}
2080
2081// Note that this is a module-level check. Even if one module is errored out,
2082// the entire build will be errored out. However, the user could make big
2083// changes to functions in single module but those changes might not be
2084// performance significant to the whole binary. Therefore, to avoid those false
2085// positives, we select a reasonable big set of hot functions that are supposed
2086// to be globally performance significant, only compute and check the mismatch
2087// within those functions. The function selection is based on two criteria:
2088// 1) The function is hot enough, which is tuned by a hotness-based
2089// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
2090// which is tuned by the MinfuncsForStalenessError flag.
2091bool SampleProfileLoader::rejectHighStalenessProfile(
2092 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
2094 "Only support for probe-based profile");
2095 uint64_t TotalHotFunc = 0;
2096 uint64_t NumMismatchedFunc = 0;
2097 for (const auto &I : Profiles) {
2098 const auto &FS = I.second;
2099 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
2100 if (!FuncDesc)
2101 continue;
2102
2103 // Use a hotness-based threshold to control the function selection.
2105 FS.getTotalSamples()))
2106 continue;
2107
2108 TotalHotFunc++;
2109 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2110 NumMismatchedFunc++;
2111 }
2112 // Make sure that the num of selected function is not too small to distinguish
2113 // from the user's benign changes.
2114 if (TotalHotFunc < MinfuncsForStalenessError)
2115 return false;
2116
2117 // Finally check the mismatch percentage against the threshold.
2118 if (NumMismatchedFunc * 100 >=
2119 TotalHotFunc * PrecentMismatchForStalenessError) {
2120 auto &Ctx = M.getContext();
2121 const char *Msg =
2122 "The input profile significantly mismatches current source code. "
2123 "Please recollect profile to avoid performance regression.";
2124 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
2125 return true;
2126 }
2127 return false;
2128}
2129
2130bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2131 ProfileSummaryInfo *_PSI,
2132 LazyCallGraph &CG) {
2133 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2134
2135 PSI = _PSI;
2136 if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2137 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2139 PSI->refresh();
2140 }
2141
2143 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2144 return false;
2145
2146 // Compute the total number of samples collected in this profile.
2147 for (const auto &I : Reader->getProfiles())
2148 TotalCollectedSamples += I.second.getTotalSamples();
2149
2150 auto Remapper = Reader->getRemapper();
2151 // Populate the symbol map.
2152 for (const auto &N_F : M.getValueSymbolTable()) {
2153 StringRef OrigName = N_F.getKey();
2154 Function *F = dyn_cast<Function>(N_F.getValue());
2155 if (F == nullptr || OrigName.empty())
2156 continue;
2157 SymbolMap[FunctionId(OrigName)] = F;
2159 if (OrigName != NewName && !NewName.empty()) {
2160 auto r = SymbolMap.emplace(FunctionId(NewName), F);
2161 // Failiing to insert means there is already an entry in SymbolMap,
2162 // thus there are multiple functions that are mapped to the same
2163 // stripped name. In this case of name conflicting, set the value
2164 // to nullptr to avoid confusion.
2165 if (!r.second)
2166 r.first->second = nullptr;
2167 OrigName = NewName;
2168 }
2169 // Insert the remapped names into SymbolMap.
2170 if (Remapper) {
2171 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2172 if (*MapName != OrigName && !MapName->empty())
2173 SymbolMap.emplace(FunctionId(*MapName), F);
2174 }
2175 }
2176 }
2177 assert(SymbolMap.count(FunctionId()) == 0 &&
2178 "No empty StringRef should be added in SymbolMap");
2179
2182 MatchingManager->runOnModule();
2183 MatchingManager->clearMatchingData();
2184 }
2185
2186 bool retval = false;
2187 for (auto *F : buildFunctionOrder(M, CG)) {
2188 assert(!F->isDeclaration());
2189 clearFunctionData();
2190 retval |= runOnFunction(*F, AM);
2191 }
2192
2193 // Account for cold calls not inlined....
2195 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2196 notInlinedCallInfo)
2197 updateProfileCallee(pair.first, pair.second.entryCount);
2198
2199 return retval;
2200}
2201
2202bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
2203 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2204 DILocation2SampleMap.clear();
2205 // By default the entry count is initialized to -1, which will be treated
2206 // conservatively by getEntryCount as the same as unknown (None). This is
2207 // to avoid newly added code to be treated as cold. If we have samples
2208 // this will be overwritten in emitAnnotations.
2209 uint64_t initialEntryCount = -1;
2210
2211 ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2212 if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2213 // initialize all the function entry counts to 0. It means all the
2214 // functions without profile will be regarded as cold.
2215 initialEntryCount = 0;
2216 // profile-sample-accurate is a user assertion which has a higher precedence
2217 // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2218 ProfAccForSymsInList = false;
2219 }
2220 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2221
2222 // PSL -- profile symbol list include all the symbols in sampled binary.
2223 // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2224 // old functions without samples being cold, without having to worry
2225 // about new and hot functions being mistakenly treated as cold.
2226 if (ProfAccForSymsInList) {
2227 // Initialize the entry count to 0 for functions in the list.
2228 if (PSL->contains(F.getName()))
2229 initialEntryCount = 0;
2230
2231 // Function in the symbol list but without sample will be regarded as
2232 // cold. To minimize the potential negative performance impact it could
2233 // have, we want to be a little conservative here saying if a function
2234 // shows up in the profile, no matter as outline function, inline instance
2235 // or call targets, treat the function as not being cold. This will handle
2236 // the cases such as most callsites of a function are inlined in sampled
2237 // binary but not inlined in current build (because of source code drift,
2238 // imprecise debug information, or the callsites are all cold individually
2239 // but not cold accumulatively...), so the outline function showing up as
2240 // cold in sampled binary will actually not be cold after current build.
2243 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2244 (!FunctionSamples::UseMD5 && NamesInProfile.count(CanonName)))
2245 initialEntryCount = -1;
2246 }
2247
2248 // Initialize entry count when the function has no existing entry
2249 // count value.
2250 if (!F.getEntryCount())
2251 F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2252 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2253 if (AM) {
2254 auto &FAM =
2256 .getManager();
2258 } else {
2259 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2260 ORE = OwnedORE.get();
2261 }
2262
2264 Samples = ContextTracker->getBaseSamplesFor(F);
2265 else {
2266 Samples = Reader->getSamplesFor(F);
2267 // Try search in previously inlined functions that were split or duplicated
2268 // into base.
2269 if (!Samples) {
2271 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2272 if (It != OutlineFunctionSamples.end()) {
2273 Samples = &It->second;
2274 } else if (auto Remapper = Reader->getRemapper()) {
2275 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2276 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2277 if (It != OutlineFunctionSamples.end())
2278 Samples = &It->second;
2279 }
2280 }
2281 }
2282 }
2283
2284 if (Samples && !Samples->empty())
2285 return emitAnnotations(F);
2286 return false;
2287}
2289 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2291 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2292 LTOPhase(LTOPhase), FS(std::move(FS)) {}
2293
2298
2299 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2301 };
2302 auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2304 };
2305 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2307 };
2308
2309 if (!FS)
2311
2312 SampleProfileLoader SampleLoader(
2313 ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2314 ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2315 : ProfileRemappingFileName,
2316 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2317
2318 if (!SampleLoader.doInitialization(M, &FAM))
2319 return PreservedAnalyses::all();
2320
2323 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
2324 return PreservedAnalyses::all();
2325
2326 return PreservedAnalyses::none();
2327}
This file defines the StringMap class.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
LVReader * CurrentReader
Definition: LVReader.cpp:153
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
#define CSINLINE_DEBUG
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
#define DEBUG_TYPE
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
This class represents a function call, abstracting a target machine's calling convention.
Debug location.
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Represents either an error or a value T.
Definition: ErrorOr.h:56
Class to represent profile counts.
Definition: Function.h:278
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1831
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:281
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Represents the cost of inlining a function.
Definition: InlineCost.h:90
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:131
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:126
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:120
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:202
InlineResult is basically true or false.
Definition: InlineCost.h:180
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1745
const BasicBlock * getParent() const
Definition: Instruction.h:152
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:110
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
Diagnostic information for applied optimization remarks.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void refresh()
If no summary is present, attempt to refresh.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:744
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:1036
FunctionId getFunction() const
Return the function name.
Definition: SampleProf.h:1069
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1085
SampleContext & getContext() const
Definition: SampleProf.h:1185
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:996
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:947
uint64_t getGUID() const
Return the GUID of the context's name.
Definition: SampleProf.h:1204
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:971
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1190
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
Definition: HashKeyMap.h:53
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:607
This class provides operator overloads to the map container using MD5 as the key type,...
Definition: SampleProf.h:1306
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
Definition: SampleProf.h:338
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:406
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:415
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:113
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:253
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:206
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:121
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
Definition: SampleProf.h:1292
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:734
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1346
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:76
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1229
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
cl::opt< int > ProfileInlineGrowthLimit
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:239
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:233