LLVM 23.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileLoader transformation. This pass
10// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12// profile information in the given profile.
13//
14// This pass generates branch weight annotations on the IR:
15//
16// - prof: Represents branch weights. This annotation is added to branches
17// to indicate the weights of each edge coming out of the branch.
18// The weight of each edge is the weight of the target block for
19// that edge. The weight of a block B is computed as the maximum
20// number of samples found in B.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/StringRef.h"
34#include "llvm/ADT/Twine.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/DebugLoc.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalValue.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
54#include "llvm/IR/LLVMContext.h"
55#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PseudoProbe.h"
66#include "llvm/Support/Debug.h"
70#include "llvm/Transforms/IPO.h"
81#include <algorithm>
82#include <cassert>
83#include <cstdint>
84#include <functional>
85#include <limits>
86#include <memory>
87#include <queue>
88#include <string>
89#include <system_error>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace sampleprof;
95using namespace llvm::sampleprofutil;
96#define DEBUG_TYPE "sample-profile"
97#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
98
99STATISTIC(NumCSInlined,
100 "Number of functions inlined with context sensitive profile");
101STATISTIC(NumCSNotInlined,
102 "Number of functions not inlined with context sensitive profile");
103STATISTIC(NumMismatchedProfile,
104 "Number of functions with CFG mismatched profile");
105STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
106STATISTIC(NumDuplicatedInlinesite,
107 "Number of inlined callsites with a partial distribution factor");
108
109STATISTIC(NumCSInlinedHitMinLimit,
110 "Number of functions with FDO inline stopped due to min size limit");
111STATISTIC(NumCSInlinedHitMaxLimit,
112 "Number of functions with FDO inline stopped due to max size limit");
114 NumCSInlinedHitGrowthLimit,
115 "Number of functions with FDO inline stopped due to growth size limit");
116
117namespace llvm {
118
119// Command line option to specify the file to read samples from. This is
120// mainly used for debugging.
122 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
123 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
124
125// The named file contains a set of transformations that may have been applied
126// to the symbol names between the program from which the sample data was
127// collected and the current program's symbols.
129 "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
130 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
131
133 "salvage-stale-profile", cl::Hidden, cl::init(false),
134 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
137 SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
138 cl::desc("Salvage unused profile by matching with new "
139 "functions on call graph."));
140
142 "report-profile-staleness", cl::Hidden, cl::init(false),
143 cl::desc("Compute and report stale profile statistical metrics."));
144
146 "persist-profile-staleness", cl::Hidden, cl::init(false),
147 cl::desc("Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
149
151 "profile-sample-accurate", cl::Hidden, cl::init(false),
152 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
155
157 "profile-sample-block-accurate", cl::Hidden, cl::init(false),
158 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
161
163 "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
164 cl::desc("For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overridden by profile-sample-accurate. "));
166
168 "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
169 cl::desc("Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
172 "enabled. "));
173
175 "sample-profile-top-down-load", cl::Hidden, cl::init(true),
176 cl::desc("Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
179
180static cl::opt<bool>
181 UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
182 cl::desc("Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
185
187 "sample-profile-inline-size", cl::Hidden, cl::init(false),
188 cl::desc("Inline cold call sites in profile loader if it's beneficial "
189 "for code size."));
190
191// Since profiles are consumed by many passes, turning on this option has
192// side effects. For instance, pre-link SCC inliner would see merged profiles
193// and inline the hot functions (that are skipped in this pass).
195 "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
196 cl::desc(
197 "If true, artificially skip inline transformation in sample-loader "
198 "pass, and merge (or scale) profiles (as configured by "
199 "--sample-profile-merge-inlinee)."));
200
202 SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
203 cl::desc("Sort profiled recursion by edge weights."));
204
206 "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
207 cl::desc("The size growth ratio limit for proirity-based sample profile "
208 "loader inlining."));
209
211 "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
212 cl::desc("The lower bound of size growth limit for "
213 "proirity-based sample profile loader inlining."));
214
216 "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
217 cl::desc("The upper bound of size growth limit for "
218 "proirity-based sample profile loader inlining."));
219
221 "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
222 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
223 "inlining."));
224
226 "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
227 cl::desc("Threshold for inlining cold callsites"));
228} // namespace llvm
229
231 "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
232 cl::desc(
233 "Relative hotness percentage threshold for indirect "
234 "call promotion in proirity-based sample profile loader inlining."));
235
237 "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
238 cl::desc(
239 "Skip relative hotness check for ICP up to given number of targets."));
240
242 "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
243 cl::desc("A function is considered hot for staleness error check if its "
244 "total sample count is above the specified percentile"));
245
247 "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
248 cl::desc("Skip the check if the number of hot functions is smaller than "
249 "the specified number."));
250
252 "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
253 cl::desc("Reject the profile if the mismatch percent is higher than the "
254 "given number."));
255
257 "sample-profile-prioritized-inline", cl::Hidden,
258 cl::desc("Use call site prioritized inlining for sample profile loader. "
259 "Currently only CSSPGO is supported."));
260
262 "sample-profile-use-preinliner", cl::Hidden,
263 cl::desc("Use the preinliner decisions stored in profile context."));
264
266 "sample-profile-recursive-inline", cl::Hidden,
267 cl::desc("Allow sample loader inliner to inline recursive calls."));
268
270 "sample-profile-remove-probe", cl::Hidden, cl::init(false),
271 cl::desc("Remove pseudo-probe after sample profile annotation."));
272
274 "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
275 cl::desc(
276 "Optimization remarks file containing inline remarks to be replayed "
277 "by inlining from sample profile loader."),
278 cl::Hidden);
279
281 "sample-profile-inline-replay-scope",
284 "Replay on functions that have remarks associated "
285 "with them (default)"),
287 "Replay on the entire module")),
288 cl::desc("Whether inline replay should be applied to the entire "
289 "Module or just the Functions (default) that are present as "
290 "callers in remarks during sample profile inlining."),
291 cl::Hidden);
292
294 "sample-profile-inline-replay-fallback",
299 "All decisions not in replay send to original advisor (default)"),
301 "AlwaysInline", "All decisions not in replay are inlined"),
303 "All decisions not in replay are not inlined")),
304 cl::desc("How sample profile inline replay treats sites that don't come "
305 "from the replay. Original: defers to original advisor, "
306 "AlwaysInline: inline all sites not in replay, NeverInline: "
307 "inline no sites not in replay"),
308 cl::Hidden);
309
311 "sample-profile-inline-replay-format",
314 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
316 "<Line Number>:<Column Number>"),
318 "LineDiscriminator", "<Line Number>.<Discriminator>"),
320 "LineColumnDiscriminator",
321 "<Line Number>:<Column Number>.<Discriminator> (default)")),
322 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
323
325 MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
326 cl::desc("Max number of promotions for a single indirect "
327 "call callsite in sample profile loader"));
328
330 "overwrite-existing-weights", cl::Hidden, cl::init(false),
331 cl::desc("Ignore existing branch weights on IR and always overwrite."));
332
334 "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
335 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
336 "sample-profile inline pass name."));
337
338namespace llvm {
340}
341
342namespace {
343
344using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
345using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
346using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
347using EdgeWeightMap = DenseMap<Edge, uint64_t>;
348using BlockEdgeMap =
350
351class GUIDToFuncNameMapper {
352public:
353 GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
354 DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
355 : CurrentReader(Reader), CurrentModule(M),
356 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
357 if (!CurrentReader.useMD5())
358 return;
359
360 for (const auto &F : CurrentModule) {
361 StringRef OrigName = F.getName();
362 CurrentGUIDToFuncNameMap.insert(
363 {Function::getGUIDAssumingExternalLinkage(OrigName), OrigName});
364
365 // Local to global var promotion used by optimization like thinlto
366 // will rename the var and add suffix like ".llvm.xxx" to the
367 // original local name. In sample profile, the suffixes of function
368 // names are all stripped. Since it is possible that the mapper is
369 // built in post-thin-link phase and var promotion has been done,
370 // we need to add the substring of function name without the suffix
371 // into the GUIDToFuncNameMap.
373 if (CanonName != OrigName)
374 CurrentGUIDToFuncNameMap.insert(
375 {Function::getGUIDAssumingExternalLinkage(CanonName), CanonName});
376 }
377
378 // Update GUIDToFuncNameMap for each function including inlinees.
379 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
380 }
381
382 ~GUIDToFuncNameMapper() {
383 if (!CurrentReader.useMD5())
384 return;
385
386 CurrentGUIDToFuncNameMap.clear();
387
388 // Reset GUIDToFuncNameMap for of each function as they're no
389 // longer valid at this point.
390 SetGUIDToFuncNameMapForAll(nullptr);
391 }
392
393private:
394 void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
395 std::queue<FunctionSamples *> FSToUpdate;
396 for (auto &IFS : CurrentReader.getProfiles()) {
397 FSToUpdate.push(&IFS.second);
398 }
399
400 while (!FSToUpdate.empty()) {
401 FunctionSamples *FS = FSToUpdate.front();
402 FSToUpdate.pop();
403 FS->GUIDToFuncNameMap = Map;
404 for (const auto &ICS : FS->getCallsiteSamples()) {
405 const FunctionSamplesMap &FSMap = ICS.second;
406 for (const auto &IFS : FSMap) {
407 FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
408 FSToUpdate.push(&FS);
409 }
410 }
411 }
412 }
413
415 Module &CurrentModule;
416 DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
417};
418
419// Inline candidate used by iterative callsite prioritized inliner
420struct InlineCandidate {
421 CallBase *CallInstr;
422 const FunctionSamples *CalleeSamples;
423 // Prorated callsite count, which will be used to guide inlining. For example,
424 // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
425 // copies will get their own distribution factors and their prorated counts
426 // will be used to decide if they should be inlined independently.
427 uint64_t CallsiteCount;
428 // Call site distribution factor to prorate the profile samples for a
429 // duplicated callsite. Default value is 1.0.
430 float CallsiteDistribution;
431};
432
433// Inline candidate comparer using call site weight
434struct CandidateComparer {
435 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
436 if (LHS.CallsiteCount != RHS.CallsiteCount)
437 return LHS.CallsiteCount < RHS.CallsiteCount;
438
439 const FunctionSamples *LCS = LHS.CalleeSamples;
440 const FunctionSamples *RCS = RHS.CalleeSamples;
441 // In inline replay mode, CalleeSamples may be null and the order doesn't
442 // matter.
443 if (!LCS || !RCS)
444 return LCS;
445
446 // Tie breaker using number of samples try to favor smaller functions first
447 if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
448 return LCS->getBodySamples().size() > RCS->getBodySamples().size();
449
450 // Tie breaker using GUID so we have stable/deterministic inlining order
451 return LCS->getGUID() < RCS->getGUID();
452 }
453};
454
455using CandidateQueue =
457 CandidateComparer>;
458
459/// Sample profile pass.
460///
461/// This pass reads profile data from the file specified by
462/// -sample-profile-file and annotates every affected function with the
463/// profile information found in that file.
464class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
465public:
466 SampleProfileLoader(
467 StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
468 IntrusiveRefCntPtr<vfs::FileSystem> FS,
469 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
470 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
471 std::function<const TargetLibraryInfo &(Function &)> GetTLI,
472 LazyCallGraph &CG, bool DisableSampleProfileInlining,
473 bool UseFlattenedProfile)
474 : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
475 std::move(FS)),
476 GetAC(std::move(GetAssumptionCache)),
477 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
478 CG(CG), LTOPhase(LTOPhase),
479 AnnotatedPassName(AnnotateSampleProfileInlinePhase
480 ? llvm::AnnotateInlinePassName(InlineContext{
483 DisableSampleProfileInlining(DisableSampleProfileInlining),
484 UseFlattenedProfile(UseFlattenedProfile) {}
485
486 bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
487 bool runOnModule(Module &M, ModuleAnalysisManager &AM,
488 ProfileSummaryInfo *_PSI);
489
490protected:
491 bool runOnFunction(Function &F, ModuleAnalysisManager &AM);
492 bool emitAnnotations(Function &F);
493 ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
494 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
495 const FunctionSamples *
496 findFunctionSamples(const Instruction &I) const override;
497 std::vector<const FunctionSamples *>
498 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
499 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
500 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
501 uint64_t Threshold);
502 // Attempt to promote indirect call and also inline the promoted call
503 bool tryPromoteAndInlineCandidate(
504 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
505 uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
506
507 bool inlineHotFunctions(Function &F,
508 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
509 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
510 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
511 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
512 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
513 bool
514 tryInlineCandidate(InlineCandidate &Candidate,
515 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
516 bool
517 inlineHotFunctionsWithPriority(Function &F,
518 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
519 // Inline cold/small functions in addition to hot ones
520 bool shouldInlineColdCallee(CallBase &CallInst);
521 void emitOptimizationRemarksForInlineCandidates(
522 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
523 bool Hot);
524 void promoteMergeNotInlinedContextSamples(
525 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
526 const Function &F);
527 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
528 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
529 void generateMDProfMetadata(Function &F);
530 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
531 const SampleProfileMap &Profiles);
532 void removePseudoProbeInstsDiscriminator(Module &M);
533
534 /// Map from function name to Function *. Used to find the function from
535 /// the function name. If the function name contains suffix, additional
536 /// entry is added to map from the stripped name to the function if there
537 /// is one-to-one mapping.
538 HashKeyMap<DenseMap, FunctionId, Function *> SymbolMap;
539
540 /// Map from function name to profile name generated by call-graph based
541 /// profile fuzzy matching(--salvage-unused-profile).
542 HashKeyMap<DenseMap, FunctionId, FunctionId> FuncNameToProfNameMap;
543
544 std::function<AssumptionCache &(Function &)> GetAC;
545 std::function<TargetTransformInfo &(Function &)> GetTTI;
546 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
547 LazyCallGraph &CG;
548
549 /// Profile tracker for different context.
550 std::unique_ptr<SampleContextTracker> ContextTracker;
551
552 /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
553 ///
554 /// We need to know the LTO phase because for example in ThinLTOPrelink
555 /// phase, in annotation, we should not promote indirect calls. Instead,
556 /// we will mark GUIDs that needs to be annotated to the function.
557 const ThinOrFullLTOPhase LTOPhase;
558 const std::string AnnotatedPassName;
559
560 /// Profle Symbol list tells whether a function name appears in the binary
561 /// used to generate the current profile.
562 std::shared_ptr<ProfileSymbolList> PSL;
563
564 // Information recorded when we declined to inline a call site
565 // because we have determined it is too cold is accumulated for
566 // each callee function. Initially this is just the entry count.
567 struct NotInlinedProfileInfo {
568 uint64_t entryCount;
569 };
570 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
571
572 // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
573 // all the function symbols defined or declared in current module.
574 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
575
576 // All the Names used in FunctionSamples including outline function
577 // names, inline instance names and call target names.
578 StringSet<> NamesInProfile;
579 // MD5 version of NamesInProfile. Either NamesInProfile or GUIDsInProfile is
580 // populated, depends on whether the profile uses MD5. Because the name table
581 // generally contains several magnitude more entries than the number of
582 // functions, we do not want to convert all names from one form to another.
583 llvm::DenseSet<uint64_t> GUIDsInProfile;
584
585 // For symbol in profile symbol list, whether to regard their profiles
586 // to be accurate. It is mainly decided by existance of profile symbol
587 // list and -profile-accurate-for-symsinlist flag, but it can be
588 // overriden by -profile-sample-accurate or profile-sample-accurate
589 // attribute.
590 bool ProfAccForSymsInList;
591
592 bool DisableSampleProfileInlining;
593
594 bool UseFlattenedProfile;
595
596 // External inline advisor used to replay inline decision from remarks.
597 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
598
599 // A helper to implement the sample profile matching algorithm.
600 std::unique_ptr<SampleProfileMatcher> MatchingManager;
601
602private:
603 const char *getAnnotatedRemarkPassName() const {
604 return AnnotatedPassName.c_str();
605 }
606};
607} // end anonymous namespace
608
609namespace llvm {
610template <>
611inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) {
612 return succ_empty(BB);
613}
614
615template <>
616inline void SampleProfileInference<Function>::findUnlikelyJumps(
617 const std::vector<const BasicBlockT *> &BasicBlocks,
618 BlockEdgeMap &Successors, FlowFunction &Func) {
619 for (auto &Jump : Func.Jumps) {
620 const auto *BB = BasicBlocks[Jump.Source];
621 const auto *Succ = BasicBlocks[Jump.Target];
622 const Instruction *TI = BB->getTerminator();
623 // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
624 // In that case block Succ should be a landing pad
625 const auto &Succs = Successors[BB];
626 if (Succs.size() == 2 && Succs.back() == Succ) {
627 if (isa<InvokeInst>(TI)) {
628 Jump.IsUnlikely = true;
629 }
630 }
631 const Instruction *SuccTI = Succ->getTerminator();
632 // Check if the target block contains UnreachableInst and mark it unlikely
633 if (SuccTI->getNumSuccessors() == 0) {
634 if (isa<UnreachableInst>(SuccTI)) {
635 Jump.IsUnlikely = true;
636 }
637 }
638 }
639}
640
641template <>
643 Function &F) {
644 DT.reset(new DominatorTree);
645 DT->recalculate(F);
646
647 PDT.reset(new PostDominatorTree(F));
648
649 LI.reset(new LoopInfo);
650 LI->analyze(*DT);
651}
652} // namespace llvm
653
654ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
656 return getProbeWeight(Inst);
657
658 const DebugLoc &DLoc = Inst.getDebugLoc();
659 if (!DLoc)
660 return std::error_code();
661
662 // Ignore all intrinsics, phinodes and branch instructions.
663 // Branch and phinodes instruction usually contains debug info from sources
664 // outside of the residing basic block, thus we ignore them during annotation.
666 return std::error_code();
667
668 // For non-CS profile, if a direct call/invoke instruction is inlined in
669 // profile (findCalleeFunctionSamples returns non-empty result), but not
670 // inlined here, it means that the inlined callsite has no sample, thus the
671 // call instruction should have 0 count.
672 // For CS profile, the callsite count of previously inlined callees is
673 // populated with the entry count of the callees.
675 if (const auto *CB = dyn_cast<CallBase>(&Inst))
676 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
677 return 0;
678
679 return getInstWeightImpl(Inst);
680}
681
682/// Get the FunctionSamples for a call instruction.
683///
684/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
685/// instance in which that call instruction is calling to. It contains
686/// all samples that resides in the inlined instance. We first find the
687/// inlined instance in which the call instruction is from, then we
688/// traverse its children to find the callsite with the matching
689/// location.
690///
691/// \param Inst Call/Invoke instruction to query.
692///
693/// \returns The FunctionSamples pointer to the inlined instance.
694const FunctionSamples *
695SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
696 const DILocation *DIL = Inst.getDebugLoc();
697 if (!DIL) {
698 return nullptr;
699 }
700
701 StringRef CalleeName;
702 if (Function *Callee = Inst.getCalledFunction())
703 CalleeName = Callee->getName();
704
706 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
707
708 const FunctionSamples *FS = findFunctionSamples(Inst);
709 if (FS == nullptr)
710 return nullptr;
711
712 return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
713 CalleeName, Reader->getRemapper(),
714 &FuncNameToProfNameMap);
715}
716
717/// Returns a vector of FunctionSamples that are the indirect call targets
718/// of \p Inst. The vector is sorted by the total number of samples. Stores
719/// the total call count of the indirect call in \p Sum.
720std::vector<const FunctionSamples *>
721SampleProfileLoader::findIndirectCallFunctionSamples(
722 const Instruction &Inst, uint64_t &Sum) const {
723 const DILocation *DIL = Inst.getDebugLoc();
724 std::vector<const FunctionSamples *> R;
725
726 if (!DIL) {
727 return R;
728 }
729
730 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
731 assert(L && R && "Expect non-null FunctionSamples");
732 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
733 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
734 return L->getGUID() < R->getGUID();
735 };
736
738 auto CalleeSamples =
739 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
740 if (CalleeSamples.empty())
741 return R;
742
743 // For CSSPGO, we only use target context profile's entry count
744 // as that already includes both inlined callee and non-inlined ones..
745 Sum = 0;
746 for (const auto *const FS : CalleeSamples) {
747 Sum += FS->getHeadSamplesEstimate();
748 R.push_back(FS);
749 }
750 llvm::sort(R, FSCompare);
751 return R;
752 }
753
754 const FunctionSamples *FS = findFunctionSamples(Inst);
755 if (FS == nullptr)
756 return R;
757
759 Sum = 0;
760 if (auto T = FS->findCallTargetMapAt(CallSite))
761 for (const auto &T_C : *T)
762 Sum += T_C.second;
763 if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
764 if (M->empty())
765 return R;
766 for (const auto &NameFS : *M) {
767 Sum += NameFS.second.getHeadSamplesEstimate();
768 R.push_back(&NameFS.second);
769 }
770 llvm::sort(R, FSCompare);
771 }
772 return R;
773}
774
775const FunctionSamples *
776SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
778 std::optional<PseudoProbe> Probe = extractProbe(Inst);
779 if (!Probe)
780 return nullptr;
781 }
782
783 const DILocation *DIL = Inst.getDebugLoc();
784 if (!DIL)
785 return Samples;
786
787 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
788 if (it.second) {
790 it.first->second = ContextTracker->getContextSamplesFor(DIL);
791 else
792 it.first->second = Samples->findFunctionSamples(
793 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
794 }
795 return it.first->second;
796}
797
798/// Check whether the indirect call promotion history of \p Inst allows
799/// the promotion for \p Candidate.
800/// If the profile count for the promotion candidate \p Candidate is
801/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
802/// for \p Inst. If we already have at least MaxNumPromotions
803/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
804/// cannot promote for \p Inst anymore.
805static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
806 uint64_t TotalCount = 0;
807 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
808 MaxNumPromotions, TotalCount, true);
809 // No valid value profile so no promoted targets have been recorded
810 // before. Ok to do ICP.
811 if (ValueData.empty())
812 return true;
813
814 unsigned NumPromoted = 0;
815 for (const auto &V : ValueData) {
816 if (V.Count != NOMORE_ICP_MAGICNUM)
817 continue;
818
819 // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
820 // metadata, it means the candidate has been promoted for this
821 // indirect call.
822 if (V.Value == Function::getGUIDAssumingExternalLinkage(Candidate))
823 return false;
824 NumPromoted++;
825 // If already have MaxNumPromotions promotion, don't do it anymore.
826 if (NumPromoted == MaxNumPromotions)
827 return false;
828 }
829 return true;
830}
831
832/// Update indirect call target profile metadata for \p Inst.
833/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
834/// If it is 0, it means updateIDTMetaData is used to mark a
835/// certain target to be promoted already. If it is not zero,
836/// we expect to use it to update the total count in the value profile.
837static void
839 const SmallVectorImpl<InstrProfValueData> &CallTargets,
840 uint64_t Sum) {
841 // Bail out early if MaxNumPromotions is zero.
842 // This prevents allocating an array of zero length below.
843 //
844 // Note `updateIDTMetaData` is called in two places so check
845 // `MaxNumPromotions` inside it.
846 if (MaxNumPromotions == 0)
847 return;
848 // OldSum is the existing total count in the value profile data.
849 uint64_t OldSum = 0;
850 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
851 MaxNumPromotions, OldSum, true);
852
853 DenseMap<uint64_t, uint64_t> ValueCountMap;
854 if (Sum == 0) {
855 assert((CallTargets.size() == 1 &&
856 CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
857 "If sum is 0, assume only one element in CallTargets "
858 "with count being NOMORE_ICP_MAGICNUM");
859 // Initialize ValueCountMap with existing value profile data.
860 for (const auto &V : ValueData)
861 ValueCountMap[V.Value] = V.Count;
862 auto Pair =
863 ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
864 // If the target already exists in value profile, decrease the total
865 // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
866 if (!Pair.second) {
867 OldSum -= Pair.first->second;
868 Pair.first->second = NOMORE_ICP_MAGICNUM;
869 }
870 Sum = OldSum;
871 } else {
872 // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
873 // counts in the value profile.
874 for (const auto &V : ValueData) {
875 if (V.Count == NOMORE_ICP_MAGICNUM)
876 ValueCountMap[V.Value] = V.Count;
877 }
878
879 for (const auto &Data : CallTargets) {
880 auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
881 if (Pair.second)
882 continue;
883 // The target represented by Data.Value has already been promoted.
884 // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
885 // Sum by Data.Count.
886 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
887 Sum -= Data.Count;
888 }
889 }
890
892 for (const auto &ValueCount : ValueCountMap) {
893 NewCallTargets.emplace_back(
894 InstrProfValueData{ValueCount.first, ValueCount.second});
895 }
896
897 llvm::sort(NewCallTargets,
898 [](const InstrProfValueData &L, const InstrProfValueData &R) {
899 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
900 });
901
902 uint32_t MaxMDCount =
903 std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
904 annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
905 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
906}
907
908/// Attempt to promote indirect call and also inline the promoted call.
909///
910/// \param F Caller function.
911/// \param Candidate ICP and inline candidate.
912/// \param SumOrigin Original sum of target counts for indirect call before
913/// promoting given candidate.
914/// \param Sum Prorated sum of remaining target counts for indirect call
915/// after promoting given candidate.
916/// \param InlinedCallSite Output vector for new call sites exposed after
917/// inlining.
918bool SampleProfileLoader::tryPromoteAndInlineCandidate(
919 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
920 SmallVector<CallBase *, 8> *InlinedCallSite) {
921 // Bail out early if sample-loader inliner is disabled.
922 if (DisableSampleProfileInlining)
923 return false;
924
925 // Bail out early if MaxNumPromotions is zero.
926 // This prevents allocating an array of zero length in callees below.
927 if (MaxNumPromotions == 0)
928 return false;
929 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
930 auto R = SymbolMap.find(CalleeFunctionName);
931 if (R == SymbolMap.end() || !R->second)
932 return false;
933
934 auto &CI = *Candidate.CallInstr;
935 if (!doesHistoryAllowICP(CI, R->second->getName()))
936 return false;
937
938 const char *Reason = "Callee function not available";
939 // R->getValue() != &F is to prevent promoting a recursive call.
940 // If it is a recursive call, we do not inline it as it could bloat
941 // the code exponentially. There is way to better handle this, e.g.
942 // clone the caller first, and inline the cloned caller if it is
943 // recursive. As llvm does not inline recursive calls, we will
944 // simply ignore it instead of handling it explicitly.
945 if (!R->second->isDeclaration() && R->second->getSubprogram() &&
946 R->second->hasFnAttribute("use-sample-profile") &&
947 R->second != &F && isLegalToPromote(CI, R->second, &Reason)) {
948 // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
949 // in the value profile metadata so the target won't be promoted again.
950 SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
951 Function::getGUIDAssumingExternalLinkage(R->second->getName()),
953 updateIDTMetaData(CI, SortedCallTargets, 0);
954
955 auto *DI = &pgo::promoteIndirectCall(
956 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
957 if (DI) {
958 Sum -= Candidate.CallsiteCount;
959 // Do not prorate the indirect callsite distribution since the original
960 // distribution will be used to scale down non-promoted profile target
961 // counts later. By doing this we lose track of the real callsite count
962 // for the leftover indirect callsite as a trade off for accurate call
963 // target counts.
964 // TODO: Ideally we would have two separate factors, one for call site
965 // counts and one is used to prorate call target counts.
966 // Do not update the promoted direct callsite distribution at this
967 // point since the original distribution combined with the callee profile
968 // will be used to prorate callsites from the callee if inlined. Once not
969 // inlined, the direct callsite distribution should be prorated so that
970 // the it will reflect the real callsite counts.
971 Candidate.CallInstr = DI;
972 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
973 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
974 if (!Inlined) {
975 // Prorate the direct callsite distribution so that it reflects real
976 // callsite counts.
978 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
979 }
980 return Inlined;
981 }
982 }
983 } else {
984 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
986 Candidate.CallInstr->getName())<< " because "
987 << Reason << "\n");
988 }
989 return false;
990}
991
992bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
994 return false;
995
996 Function *Callee = CallInst.getCalledFunction();
997 if (Callee == nullptr)
998 return false;
999
1000 InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
1001 GetAC, GetTLI);
1002
1003 if (Cost.isNever())
1004 return false;
1005
1006 if (Cost.isAlways())
1007 return true;
1008
1009 return Cost.getCost() <= SampleColdCallSiteThreshold;
1010}
1011
1012void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1013 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1014 bool Hot) {
1015 for (auto *I : Candidates) {
1016 Function *CalledFunction = I->getCalledFunction();
1017 if (CalledFunction) {
1018 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1019 "InlineAttempt", I->getDebugLoc(),
1020 I->getParent())
1021 << "previous inlining reattempted for "
1022 << (Hot ? "hotness: '" : "size: '")
1023 << ore::NV("Callee", CalledFunction) << "' into '"
1024 << ore::NV("Caller", &F) << "'");
1025 }
1026 }
1027}
1028
1029void SampleProfileLoader::findExternalInlineCandidate(
1030 CallBase *CB, const FunctionSamples *Samples,
1031 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1032
1033 // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
1034 // function make sure it's imported
1035 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1036 // Samples may not exist for replayed function, if so
1037 // just add the direct GUID and move on
1038 if (!Samples) {
1039 InlinedGUIDs.insert(Function::getGUIDAssumingExternalLinkage(
1040 CB->getCalledFunction()->getName()));
1041 return;
1042 }
1043 // Otherwise, drop the threshold to import everything that we can
1044 Threshold = 0;
1045 }
1046
1047 // In some rare cases, call instruction could be changed after being pushed
1048 // into inline candidate queue, this is because earlier inlining may expose
1049 // constant propagation which can change indirect call to direct call. When
1050 // this happens, we may fail to find matching function samples for the
1051 // candidate later, even if a match was found when the candidate was enqueued.
1052 if (!Samples)
1053 return;
1054
1055 // For AutoFDO profile, retrieve candidate profiles by walking over
1056 // the nested inlinee profiles.
1058 // Set threshold to zero to honor pre-inliner decision.
1060 Threshold = 0;
1061 Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1062 return;
1063 }
1064
1065 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1066 std::queue<ContextTrieNode *> CalleeList;
1067 CalleeList.push(Caller);
1068 while (!CalleeList.empty()) {
1069 ContextTrieNode *Node = CalleeList.front();
1070 CalleeList.pop();
1071 FunctionSamples *CalleeSample = Node->getFunctionSamples();
1072 // For CSSPGO profile, retrieve candidate profile by walking over the
1073 // trie built for context profile. Note that also take call targets
1074 // even if callee doesn't have a corresponding context profile.
1075 if (!CalleeSample)
1076 continue;
1077
1078 // If pre-inliner decision is used, honor that for importing as well.
1079 bool PreInline =
1082 if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1083 continue;
1084
1085 Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
1086 // Add to the import list only when it's defined out of module.
1087 if (!Func || Func->isDeclaration())
1088 InlinedGUIDs.insert(CalleeSample->getGUID());
1089
1090 // Import hot CallTargets, which may not be available in IR because full
1091 // profile annotation cannot be done until backend compilation in ThinLTO.
1092 for (const auto &BS : CalleeSample->getBodySamples())
1093 for (const auto &TS : BS.second.getCallTargets())
1094 if (TS.second > Threshold) {
1095 const Function *Callee = SymbolMap.lookup(TS.first);
1096 if (!Callee || Callee->isDeclaration())
1097 InlinedGUIDs.insert(TS.first.getHashCode());
1098 }
1099
1100 // Import hot child context profile associted with callees. Note that this
1101 // may have some overlap with the call target loop above, but doing this
1102 // based child context profile again effectively allow us to use the max of
1103 // entry count and call target count to determine importing.
1104 for (auto &Child : Node->getAllChildContext()) {
1105 ContextTrieNode *CalleeNode = &Child.second;
1106 CalleeList.push(CalleeNode);
1107 }
1108 }
1109}
1110
1111/// Iteratively inline hot callsites of a function.
1112///
1113/// Iteratively traverse all callsites of the function \p F, so as to
1114/// find out callsites with corresponding inline instances.
1115///
1116/// For such callsites,
1117/// - If it is hot enough, inline the callsites and adds callsites of the callee
1118/// into the caller. If the call is an indirect call, first promote
1119/// it to direct call. Each indirect call is limited with a single target.
1120///
1121/// - If a callsite is not inlined, merge the its profile to the outline
1122/// version (if --sample-profile-merge-inlinee is true), or scale the
1123/// counters of standalone function based on the profile of inlined
1124/// instances (if --sample-profile-merge-inlinee is false).
1125///
1126/// Later passes may consume the updated profiles.
1127///
1128/// \param F function to perform iterative inlining.
1129/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1130/// inlined in the profiled binary.
1131///
1132/// \returns True if there is any inline happened.
1133bool SampleProfileLoader::inlineHotFunctions(
1134 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1135 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1136 // Profile symbol list is ignored when profile-sample-accurate is on.
1137 assert((!ProfAccForSymsInList ||
1139 !F.hasFnAttribute("profile-sample-accurate"))) &&
1140 "ProfAccForSymsInList should be false when profile-sample-accurate "
1141 "is enabled");
1142
1143 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1144 bool Changed = false;
1145 bool LocalChanged = true;
1146 while (LocalChanged) {
1147 LocalChanged = false;
1149 for (auto &BB : F) {
1150 bool Hot = false;
1151 SmallVector<CallBase *, 10> AllCandidates;
1152 SmallVector<CallBase *, 10> ColdCandidates;
1153 for (auto &I : BB) {
1154 const FunctionSamples *FS = nullptr;
1155 if (auto *CB = dyn_cast<CallBase>(&I)) {
1156 if (!isa<IntrinsicInst>(I)) {
1157 if ((FS = findCalleeFunctionSamples(*CB))) {
1158 assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1159 "GUIDToFuncNameMap has to be populated");
1160 AllCandidates.push_back(CB);
1161 if (FS->getHeadSamplesEstimate() > 0 ||
1163 LocalNotInlinedCallSites.insert({CB, FS});
1164 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1165 Hot = true;
1166 else if (shouldInlineColdCallee(*CB))
1167 ColdCandidates.push_back(CB);
1168 } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1169 AllCandidates.push_back(CB);
1170 }
1171 }
1172 }
1173 }
1174 if (Hot || ExternalInlineAdvisor) {
1175 CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1176 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1177 } else {
1178 CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1179 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1180 }
1181 }
1182 for (CallBase *I : CIS) {
1183 Function *CalledFunction = I->getCalledFunction();
1184 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1185 0 /* dummy count */,
1186 1.0 /* dummy distribution factor */};
1187 // Do not inline recursive calls.
1188 if (CalledFunction == &F)
1189 continue;
1190 if (I->isIndirectCall()) {
1191 uint64_t Sum;
1192 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1193 uint64_t SumOrigin = Sum;
1194 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1195 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1196 PSI->getOrCompHotCountThreshold());
1197 continue;
1198 }
1199 if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1200 continue;
1201
1202 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1203 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1204 LocalNotInlinedCallSites.erase(I);
1205 LocalChanged = true;
1206 }
1207 }
1208 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1209 !CalledFunction->isDeclaration()) {
1210 if (tryInlineCandidate(Candidate)) {
1211 LocalNotInlinedCallSites.erase(I);
1212 LocalChanged = true;
1213 }
1214 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1215 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1216 InlinedGUIDs,
1217 PSI->getOrCompHotCountThreshold());
1218 }
1219 }
1220 Changed |= LocalChanged;
1221 }
1222
1223 // For CS profile, profile for not inlined context will be merged when
1224 // base profile is being retrieved.
1226 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1227 return Changed;
1228}
1229
1230bool SampleProfileLoader::tryInlineCandidate(
1231 InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1232 // Do not attempt to inline a candidate if
1233 // --disable-sample-loader-inlining is true.
1234 if (DisableSampleProfileInlining)
1235 return false;
1236
1237 CallBase &CB = *Candidate.CallInstr;
1238 Function *CalledFunction = CB.getCalledFunction();
1239 assert(CalledFunction && "Expect a callee with definition");
1240 DebugLoc DLoc = CB.getDebugLoc();
1241 BasicBlock *BB = CB.getParent();
1242
1243 InlineCost Cost = shouldInlineCandidate(Candidate);
1244 if (Cost.isNever()) {
1245 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1246 "InlineFail", DLoc, BB)
1247 << "incompatible inlining");
1248 return false;
1249 }
1250
1251 if (!Cost)
1252 return false;
1253
1254 InlineFunctionInfo IFI(GetAC);
1255 IFI.UpdateProfile = false;
1256 InlineResult IR = InlineFunction(CB, IFI,
1257 /*MergeAttributes=*/true);
1258 if (!IR.isSuccess())
1259 return false;
1260
1261 // The call to InlineFunction erases I, so we can't pass it here.
1262 emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1263 Cost, true, getAnnotatedRemarkPassName());
1264
1265 // Now populate the list of newly exposed call sites.
1266 if (InlinedCallSites) {
1267 InlinedCallSites->clear();
1268 llvm::append_range(*InlinedCallSites, IFI.InlinedCallSites);
1269 }
1270
1272 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1273 ++NumCSInlined;
1274
1275 // Prorate inlined probes for a duplicated inlining callsite which probably
1276 // has a distribution less than 100%. Samples for an inlinee should be
1277 // distributed among the copies of the original callsite based on each
1278 // callsite's distribution factor for counts accuracy. Note that an inlined
1279 // probe may come with its own distribution factor if it has been duplicated
1280 // in the inlinee body. The two factor are multiplied to reflect the
1281 // aggregation of duplication.
1282 if (Candidate.CallsiteDistribution < 1) {
1283 for (auto &I : IFI.InlinedCallSites) {
1284 if (std::optional<PseudoProbe> Probe = extractProbe(*I))
1285 setProbeDistributionFactor(*I, Probe->Factor *
1286 Candidate.CallsiteDistribution);
1287 }
1288 NumDuplicatedInlinesite++;
1289 }
1290
1291 return true;
1292}
1293
1294bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1295 CallBase *CB) {
1296 assert(CB && "Expect non-null call instruction");
1297
1298 if (isa<IntrinsicInst>(CB))
1299 return false;
1300
1301 // Find the callee's profile. For indirect call, find hottest target profile.
1302 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1303 // If ExternalInlineAdvisor wants to inline this site, do so even
1304 // if Samples are not present.
1305 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1306 return false;
1307
1308 float Factor = 1.0;
1309 if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
1310 Factor = Probe->Factor;
1311
1312 uint64_t CallsiteCount =
1313 CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1314 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1315 return true;
1316}
1317
1318std::optional<InlineCost>
1319SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1320 std::unique_ptr<InlineAdvice> Advice = nullptr;
1321 if (ExternalInlineAdvisor) {
1322 Advice = ExternalInlineAdvisor->getAdvice(CB);
1323 if (Advice) {
1324 if (!Advice->isInliningRecommended()) {
1325 Advice->recordUnattemptedInlining();
1326 return InlineCost::getNever("not previously inlined");
1327 }
1328 Advice->recordInlining();
1329 return InlineCost::getAlways("previously inlined");
1330 }
1331 }
1332
1333 return {};
1334}
1335
1336bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1337 std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1338 return Cost ? !!*Cost : false;
1339}
1340
1341InlineCost
1342SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1343 if (std::optional<InlineCost> ReplayCost =
1344 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1345 return *ReplayCost;
1346 // Adjust threshold based on call site hotness, only do this for callsite
1347 // prioritized inliner because otherwise cost-benefit check is done earlier.
1348 int SampleThreshold = SampleColdCallSiteThreshold;
1350 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1351 SampleThreshold = SampleHotCallSiteThreshold;
1352 else if (!ProfileSizeInline)
1353 return InlineCost::getNever("cold callsite");
1354 }
1355
1356 Function *Callee = Candidate.CallInstr->getCalledFunction();
1357 assert(Callee && "Expect a definition for inline candidate of direct call");
1358
1359 InlineParams Params = getInlineParams();
1360 // We will ignore the threshold from inline cost, so always get full cost.
1361 Params.ComputeFullInlineCost = true;
1363 // Checks if there is anything in the reachable portion of the callee at
1364 // this callsite that makes this inlining potentially illegal. Need to
1365 // set ComputeFullInlineCost, otherwise getInlineCost may return early
1366 // when cost exceeds threshold without checking all IRs in the callee.
1367 // The acutal cost does not matter because we only checks isNever() to
1368 // see if it is legal to inline the callsite.
1369 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1370 GetTTI(*Callee), GetAC, GetTLI);
1371
1372 // Honor always inline and never inline from call analyzer
1373 if (Cost.isNever() || Cost.isAlways())
1374 return Cost;
1375
1376 // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1377 // decisions based on hotness as well as accurate function byte sizes for
1378 // given context using function/inlinee sizes from previous build. It
1379 // stores the decision in profile, and also adjust/merge context profile
1380 // aiming at better context-sensitive post-inline profile quality, assuming
1381 // all inline decision estimates are going to be honored by compiler. Here
1382 // we replay that inline decision under `sample-profile-use-preinliner`.
1383 // Note that we don't need to handle negative decision from preinliner as
1384 // context profile for not inlined calls are merged by preinliner already.
1385 if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1386 // Once two node are merged due to promotion, we're losing some context
1387 // so the original context-sensitive preinliner decision should be ignored
1388 // for SyntheticContext.
1389 SampleContext &Context = Candidate.CalleeSamples->getContext();
1390 if (!Context.hasState(SyntheticContext) &&
1391 Context.hasAttribute(ContextShouldBeInlined))
1392 return InlineCost::getAlways("preinliner");
1393 }
1394
1395 // For old FDO inliner, we inline the call site if it is below hot threshold,
1396 // even if the function is hot based on sample profile data. This is to
1397 // prevent huge functions from being inlined.
1400 }
1401
1402 // Otherwise only use the cost from call analyzer, but overwite threshold with
1403 // Sample PGO threshold.
1404 return InlineCost::get(Cost.getCost(), SampleThreshold);
1405}
1406
1407bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1408 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1409 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1410 // Profile symbol list is ignored when profile-sample-accurate is on.
1411 assert((!ProfAccForSymsInList ||
1413 !F.hasFnAttribute("profile-sample-accurate"))) &&
1414 "ProfAccForSymsInList should be false when profile-sample-accurate "
1415 "is enabled");
1416
1417 // Populating worklist with initial call sites from root inliner, along
1418 // with call site weights.
1419 CandidateQueue CQueue;
1420 InlineCandidate NewCandidate;
1421 for (auto &BB : F) {
1422 for (auto &I : BB) {
1423 auto *CB = dyn_cast<CallBase>(&I);
1424 if (!CB)
1425 continue;
1426 if (getInlineCandidate(&NewCandidate, CB))
1427 CQueue.push(NewCandidate);
1428 }
1429 }
1430
1431 // Cap the size growth from profile guided inlining. This is needed even
1432 // though cost of each inline candidate already accounts for callee size,
1433 // because with top-down inlining, we can grow inliner size significantly
1434 // with large number of smaller inlinees each pass the cost check.
1436 "Max inline size limit should not be smaller than min inline size "
1437 "limit.");
1438 unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1439 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
1440 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
1441 if (ExternalInlineAdvisor)
1442 SizeLimit = std::numeric_limits<unsigned>::max();
1443
1444 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1445
1446 // Perform iterative BFS call site prioritized inlining
1447 bool Changed = false;
1448 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1449 InlineCandidate Candidate = CQueue.top();
1450 CQueue.pop();
1451 CallBase *I = Candidate.CallInstr;
1452 Function *CalledFunction = I->getCalledFunction();
1453
1454 if (CalledFunction == &F)
1455 continue;
1456 if (I->isIndirectCall()) {
1457 uint64_t Sum = 0;
1458 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1459 uint64_t SumOrigin = Sum;
1460 Sum *= Candidate.CallsiteDistribution;
1461 unsigned ICPCount = 0;
1462 for (const auto *FS : CalleeSamples) {
1463 // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1464 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1465 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1466 PSI->getOrCompHotCountThreshold());
1467 continue;
1468 }
1469 uint64_t EntryCountDistributed =
1470 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1471 // In addition to regular inline cost check, we also need to make sure
1472 // ICP isn't introducing excessive speculative checks even if individual
1473 // target looks beneficial to promote and inline. That means we should
1474 // only do ICP when there's a small number dominant targets.
1475 if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1476 EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1477 break;
1478 // TODO: Fix CallAnalyzer to handle all indirect calls.
1479 // For indirect call, we don't run CallAnalyzer to get InlineCost
1480 // before actual inlining. This is because we could see two different
1481 // types from the same definition, which makes CallAnalyzer choke as
1482 // it's expecting matching parameter type on both caller and callee
1483 // side. See example from PR18962 for the triggering cases (the bug was
1484 // fixed, but we generate different types).
1485 if (!PSI->isHotCount(EntryCountDistributed))
1486 break;
1487 SmallVector<CallBase *, 8> InlinedCallSites;
1488 // Attach function profile for promoted indirect callee, and update
1489 // call site count for the promoted inline candidate too.
1490 Candidate = {I, FS, EntryCountDistributed,
1491 Candidate.CallsiteDistribution};
1492 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1493 &InlinedCallSites)) {
1494 for (auto *CB : InlinedCallSites) {
1495 if (getInlineCandidate(&NewCandidate, CB))
1496 CQueue.emplace(NewCandidate);
1497 }
1498 ICPCount++;
1499 Changed = true;
1500 } else if (!ContextTracker) {
1501 LocalNotInlinedCallSites.insert({I, FS});
1502 }
1503 }
1504 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1505 !CalledFunction->isDeclaration()) {
1506 SmallVector<CallBase *, 8> InlinedCallSites;
1507 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1508 for (auto *CB : InlinedCallSites) {
1509 if (getInlineCandidate(&NewCandidate, CB))
1510 CQueue.emplace(NewCandidate);
1511 }
1512 Changed = true;
1513 } else if (!ContextTracker) {
1514 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1515 }
1516 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1517 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1518 InlinedGUIDs,
1519 PSI->getOrCompHotCountThreshold());
1520 }
1521 }
1522
1523 if (!CQueue.empty()) {
1524 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1525 ++NumCSInlinedHitMaxLimit;
1526 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1527 ++NumCSInlinedHitMinLimit;
1528 else
1529 ++NumCSInlinedHitGrowthLimit;
1530 }
1531
1532 // For CS profile, profile for not inlined context will be merged when
1533 // base profile is being retrieved.
1535 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1536 return Changed;
1537}
1538
1539void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1540 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1541 const Function &F) {
1542 // Accumulate not inlined callsite information into notInlinedSamples
1543 for (const auto &Pair : NonInlinedCallSites) {
1544 CallBase *I = Pair.first;
1545 Function *Callee = I->getCalledFunction();
1546 if (!Callee || Callee->isDeclaration())
1547 continue;
1548
1549 ORE->emit(
1550 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1551 I->getDebugLoc(), I->getParent())
1552 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1553 << "' into '" << ore::NV("Caller", &F) << "'");
1554
1555 ++NumCSNotInlined;
1556 const FunctionSamples *FS = Pair.second;
1557 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1558 continue;
1559 }
1560
1561 // Do not merge a context that is already duplicated into the base profile.
1562 if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1563 continue;
1564
1565 if (ProfileMergeInlinee) {
1566 // A function call can be replicated by optimizations like callsite
1567 // splitting or jump threading and the replicates end up sharing the
1568 // sample nested callee profile instead of slicing the original
1569 // inlinee's profile. We want to do merge exactly once by filtering out
1570 // callee profiles with a non-zero head sample count.
1571 if (FS->getHeadSamples() == 0) {
1572 // Use entry samples as head samples during the merge, as inlinees
1573 // don't have head samples.
1574 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1575 FS->getHeadSamplesEstimate());
1576
1577 // Note that we have to do the merge right after processing function.
1578 // This allows OutlineFS's profile to be used for annotation during
1579 // top-down processing of functions' annotation.
1580 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1581 // If outlined function does not exist in the profile, add it to a
1582 // separate map so that it does not rehash the original profile.
1583 if (!OutlineFS)
1584 OutlineFS = &OutlineFunctionSamples[
1585 FunctionId(FunctionSamples::getCanonicalFnName(Callee->getName()))];
1586 OutlineFS->merge(*FS, 1);
1587 // Set outlined profile to be synthetic to not bias the inliner.
1588 OutlineFS->setContextSynthetic();
1589 }
1590 } else {
1591 auto pair =
1592 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1593 pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1594 }
1595 }
1596}
1597
1598/// Returns the sorted CallTargetMap \p M by count in descending order.
1602 for (const auto &I : SampleRecord::sortCallTargets(M)) {
1603 R.emplace_back(
1604 InstrProfValueData{I.first.getHashCode(), I.second});
1605 }
1606 return R;
1607}
1608
1609// Generate MD_prof metadata for every branch instruction using the
1610// edge weights computed during propagation.
1611void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1612 // Generate MD_prof metadata for every branch instruction using the
1613 // edge weights computed during propagation.
1614 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1615 LLVMContext &Ctx = F.getContext();
1616 MDBuilder MDB(Ctx);
1617 for (auto &BI : F) {
1618 BasicBlock *BB = &BI;
1619
1620 if (BlockWeights[BB]) {
1621 for (auto &I : *BB) {
1622 if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1623 continue;
1625 const DebugLoc &DLoc = I.getDebugLoc();
1626 if (!DLoc)
1627 continue;
1628 const DILocation *DIL = DLoc;
1629 const FunctionSamples *FS = findFunctionSamples(I);
1630 if (!FS)
1631 continue;
1633 ErrorOr<SampleRecord::CallTargetMap> T =
1634 FS->findCallTargetMapAt(CallSite);
1635 if (!T || T.get().empty())
1636 continue;
1638 // Prorate the callsite counts based on the pre-ICP distribution
1639 // factor to reflect what is already done to the callsite before
1640 // ICP, such as calliste cloning.
1641 if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
1642 if (Probe->Factor < 1)
1643 T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1644 }
1645 }
1646 SmallVector<InstrProfValueData, 2> SortedCallTargets =
1648 uint64_t Sum = 0;
1649 for (const auto &C : T.get())
1650 Sum += C.second;
1651 // With CSSPGO all indirect call targets are counted torwards the
1652 // original indirect call site in the profile, including both
1653 // inlined and non-inlined targets.
1655 if (const FunctionSamplesMap *M =
1656 FS->findFunctionSamplesMapAt(CallSite)) {
1657 for (const auto &NameFS : *M)
1658 Sum += NameFS.second.getHeadSamplesEstimate();
1659 }
1660 }
1661 if (Sum)
1662 updateIDTMetaData(I, SortedCallTargets, Sum);
1663 else if (OverwriteExistingWeights)
1664 I.setMetadata(LLVMContext::MD_prof, nullptr);
1665 } else if (!isa<IntrinsicInst>(&I)) {
1667 I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])},
1668 /*IsExpected=*/false);
1669 }
1670 }
1672 // Set profile metadata (possibly annotated by LTO prelink) to zero or
1673 // clear it for cold code.
1674 for (auto &I : *BB) {
1675 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1677 I.setMetadata(LLVMContext::MD_prof, nullptr);
1678 } else {
1679 setBranchWeights(I, ArrayRef<uint32_t>{uint32_t(0)},
1680 /*IsExpected=*/false);
1681 }
1682 }
1683 }
1684 }
1685
1686 Instruction *TI = BB->getTerminator();
1687 if (TI->getNumSuccessors() == 1)
1688 continue;
1689 if (!isa<CondBrInst>(TI) && !isa<SwitchInst>(TI) &&
1691 continue;
1692
1693 DebugLoc BranchLoc = TI->getDebugLoc();
1694 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1695 << ((BranchLoc) ? Twine(BranchLoc.getLine())
1696 : Twine("<UNKNOWN LOCATION>"))
1697 << ".\n");
1698 SmallVector<uint32_t, 4> Weights;
1699 uint32_t MaxWeight = 0;
1700 Instruction *MaxDestInst;
1701 // Since profi treats multiple edges (multiway branches) as a single edge,
1702 // we need to distribute the computed weight among the branches. We do
1703 // this by evenly splitting the edge weight among destinations.
1704 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1705 std::vector<uint64_t> EdgeIndex;
1707 EdgeIndex.resize(TI->getNumSuccessors());
1708 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1709 const BasicBlock *Succ = TI->getSuccessor(I);
1710 EdgeIndex[I] = EdgeMultiplicity[Succ];
1711 EdgeMultiplicity[Succ]++;
1712 }
1713 }
1714 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1715 BasicBlock *Succ = TI->getSuccessor(I);
1716 Edge E = std::make_pair(BB, Succ);
1717 uint64_t Weight = EdgeWeights[E];
1718 LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1719 // Use uint32_t saturated arithmetic to adjust the incoming weights,
1720 // if needed. Sample counts in profiles are 64-bit unsigned values,
1721 // but internally branch weights are expressed as 32-bit values.
1722 if (Weight > std::numeric_limits<uint32_t>::max()) {
1723 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
1724 Weight = std::numeric_limits<uint32_t>::max();
1725 }
1726 if (!SampleProfileUseProfi) {
1727 // Weight is added by one to avoid propagation errors introduced by
1728 // 0 weights.
1729 Weights.push_back(static_cast<uint32_t>(
1730 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1731 : Weight + 1));
1732 } else {
1733 // Profi creates proper weights that do not require "+1" adjustments but
1734 // we evenly split the weight among branches with the same destination.
1735 uint64_t W = Weight / EdgeMultiplicity[Succ];
1736 // Rounding up, if needed, so that first branches are hotter.
1737 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1738 W++;
1739 Weights.push_back(static_cast<uint32_t>(W));
1740 }
1741 if (Weight != 0) {
1742 if (Weight > MaxWeight) {
1743 MaxWeight = Weight;
1744 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1745 }
1746 }
1747 }
1748
1749 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1750
1751 uint64_t TempWeight;
1752 // Only set weights if there is at least one non-zero weight.
1753 // In any other case, let the analyzer set weights.
1754 // Do not set weights if the weights are present unless under
1755 // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1756 // twice. If the first annotation already set the weights, the second pass
1757 // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1758 // weight should have their existing metadata (possibly annotated by LTO
1759 // prelink) cleared.
1760 if (MaxWeight > 0 &&
1761 (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1762 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1763 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
1764 ORE->emit([&]() {
1765 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1766 << "most popular destination for conditional branches at "
1767 << ore::NV("CondBranchesLoc", BranchLoc);
1768 });
1769 } else {
1771 TI->setMetadata(LLVMContext::MD_prof, nullptr);
1772 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1773 } else {
1774 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1775 }
1776 }
1777 }
1778}
1779
1780/// Once all the branch weights are computed, we emit the MD_prof
1781/// metadata on BB using the computed values for each of its branches.
1782///
1783/// \param F The function to query.
1784///
1785/// \returns true if \p F was modified. Returns false, otherwise.
1786bool SampleProfileLoader::emitAnnotations(Function &F) {
1787 bool Changed = false;
1788
1790 LLVM_DEBUG({
1791 if (!ProbeManager->getDesc(F))
1792 dbgs() << "Probe descriptor missing for Function " << F.getName()
1793 << "\n";
1794 });
1795
1796 if (ProbeManager->profileIsValid(F, *Samples)) {
1797 ++NumMatchedProfile;
1798 } else {
1799 ++NumMismatchedProfile;
1800 LLVM_DEBUG(
1801 dbgs() << "Profile is invalid due to CFG mismatch for Function "
1802 << F.getName() << "\n");
1804 return false;
1805 }
1806 } else {
1807 if (getFunctionLoc(F) == 0)
1808 return false;
1809
1810 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1811 << F.getName() << ": " << getFunctionLoc(F) << "\n");
1812 }
1813
1814 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1816 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1817 else
1818 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1819
1820 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1821
1822 if (Changed)
1823 generateMDProfMetadata(F);
1824
1825 emitCoverageRemarks(F);
1826 return Changed;
1827}
1828
1829std::unique_ptr<ProfiledCallGraph>
1830SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1831 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1833 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1834 else
1835 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1836
1837 // Add all functions into the profiled call graph even if they are not in
1838 // the profile. This makes sure functions missing from the profile still
1839 // gets a chance to be processed.
1840 for (Function &F : M) {
1842 continue;
1843 ProfiledCG->addProfiledFunction(
1845 }
1846
1847 return ProfiledCG;
1848}
1849
1850std::vector<Function *>
1851SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1852 std::vector<Function *> FunctionOrderList;
1853 FunctionOrderList.reserve(M.size());
1854
1856 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1857 "together with -sample-profile-top-down-load.\n";
1858
1859 if (!ProfileTopDownLoad) {
1860 if (ProfileMergeInlinee) {
1861 // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1862 // because the profile for a function may be used for the profile
1863 // annotation of its outline copy before the profile merging of its
1864 // non-inlined inline instances, and that is not the way how
1865 // ProfileMergeInlinee is supposed to work.
1866 ProfileMergeInlinee = false;
1867 }
1868
1869 for (Function &F : M)
1871 FunctionOrderList.push_back(&F);
1872 return FunctionOrderList;
1873 }
1874
1877 // Use profiled call edges to augment the top-down order. There are cases
1878 // that the top-down order computed based on the static call graph doesn't
1879 // reflect real execution order. For example
1880 //
1881 // 1. Incomplete static call graph due to unknown indirect call targets.
1882 // Adjusting the order by considering indirect call edges from the
1883 // profile can enable the inlining of indirect call targets by allowing
1884 // the caller processed before them.
1885 // 2. Mutual call edges in an SCC. The static processing order computed for
1886 // an SCC may not reflect the call contexts in the context-sensitive
1887 // profile, thus may cause potential inlining to be overlooked. The
1888 // function order in one SCC is being adjusted to a top-down order based
1889 // on the profile to favor more inlining. This is only a problem with CS
1890 // profile.
1891 // 3. Transitive indirect call edges due to inlining. When a callee function
1892 // (say B) is inlined into a caller function (say A) in LTO prelink,
1893 // every call edge originated from the callee B will be transferred to
1894 // the caller A. If any transferred edge (say A->C) is indirect, the
1895 // original profiled indirect edge B->C, even if considered, would not
1896 // enforce a top-down order from the caller A to the potential indirect
1897 // call target C in LTO postlink since the inlined callee B is gone from
1898 // the static call graph.
1899 // 4. #3 can happen even for direct call targets, due to functions defined
1900 // in header files. A header function (say A), when included into source
1901 // files, is defined multiple times but only one definition survives due
1902 // to ODR. Therefore, the LTO prelink inlining done on those dropped
1903 // definitions can be useless based on a local file scope. More
1904 // importantly, the inlinee (say B), once fully inlined to a
1905 // to-be-dropped A, will have no profile to consume when its outlined
1906 // version is compiled. This can lead to a profile-less prelink
1907 // compilation for the outlined version of B which may be called from
1908 // external modules. while this isn't easy to fix, we rely on the
1909 // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1910 // the A can be inlined in its local scope in prelink, it may not exist
1911 // in the merged IR in postlink, and we'll need the profiled call edges
1912 // to enforce a top-down order for the rest of the functions.
1913 //
1914 // Considering those cases, a profiled call graph completely independent of
1915 // the static call graph is constructed based on profile data, where
1916 // function objects are not even needed to handle case #3 and case 4.
1917 //
1918 // Note that static callgraph edges are completely ignored since they
1919 // can be conflicting with profiled edges for cyclic SCCs and may result in
1920 // an SCC order incompatible with profile-defined one. Using strictly
1921 // profile order ensures a maximum inlining experience. On the other hand,
1922 // static call edges are not so important when they don't correspond to a
1923 // context in the profile.
1924
1925 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1926 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1927 while (!CGI.isAtEnd()) {
1928 auto Range = *CGI;
1929 if (SortProfiledSCC) {
1930 // Sort nodes in one SCC based on callsite hotness.
1931 scc_member_iterator<ProfiledCallGraph *> SI(*CGI);
1932 Range = *SI;
1933 }
1934 for (auto *Node : Range) {
1935 Function *F = SymbolMap.lookup(Node->Name);
1936 if (F && !skipProfileForFunction(*F))
1937 FunctionOrderList.push_back(F);
1938 }
1939 ++CGI;
1940 }
1941 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1942 } else
1943 buildTopDownFuncOrder(CG, FunctionOrderList);
1944
1945 LLVM_DEBUG({
1946 dbgs() << "Function processing order:\n";
1947 for (auto F : FunctionOrderList) {
1948 dbgs() << F->getName() << "\n";
1949 }
1950 });
1951
1952 return FunctionOrderList;
1953}
1954
1955bool SampleProfileLoader::doInitialization(Module &M,
1957 auto &Ctx = M.getContext();
1958
1959 auto ReaderOrErr = SampleProfileReader::create(
1960 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1961 if (std::error_code EC = ReaderOrErr.getError()) {
1962 std::string Msg = "Could not open profile: " + EC.message();
1963 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1964 return false;
1965 }
1966 Reader = std::move(ReaderOrErr.get());
1967 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1968 // set module before reading the profile so reader may be able to only
1969 // read the function profiles which are used by the current module.
1970 Reader->setModule(&M);
1971 if (std::error_code EC = Reader->read()) {
1972 std::string Msg = "profile reading failed: " + EC.message();
1973 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1974 return false;
1975 }
1976
1977 PSL = Reader->getProfileSymbolList();
1978
1980 DisableSampleProfileInlining = DisableSampleLoaderInlining;
1981
1982 if (UseFlattenedProfile)
1983 ProfileConverter::flattenProfile(Reader->getProfiles(),
1984 Reader->profileIsCS());
1985
1986 // While profile-sample-accurate is on, ignore symbol list.
1987 ProfAccForSymsInList =
1989 if (ProfAccForSymsInList) {
1990 NamesInProfile.clear();
1991 GUIDsInProfile.clear();
1992 auto NameTable = Reader->getNameTable();
1994 for (FunctionId Name : NameTable)
1995 GUIDsInProfile.insert(Name.getHashCode());
1996 } else {
1997 for (FunctionId Name : NameTable)
1998 NamesInProfile.insert(Name.stringRef());
1999 }
2000 CoverageTracker.setProfAccForSymsInList(true);
2001 }
2002
2003 if (FAM && !ProfileInlineReplayFile.empty()) {
2004 ExternalInlineAdvisor = getReplayInlineAdvisor(
2005 M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2006 ReplayInlinerSettings{ProfileInlineReplayFile,
2010 /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2011 }
2012
2013 // Apply tweaks if context-sensitive or probe-based profile is available.
2014 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2015 Reader->profileIsProbeBased()) {
2019 SampleProfileUseProfi = true;
2022 // Enable priority-base inliner and size inline by default for CSSPGO.
2024 ProfileSizeInline = true;
2027 // For CSSPGO, we also allow recursive inline to best use context profile.
2029 AllowRecursiveInline = true;
2030
2031 if (Reader->profileIsPreInlined()) {
2033 UsePreInlinerDecision = true;
2034 }
2035
2036 // Enable stale profile matching by default for probe-based profile.
2037 // Currently the matching relies on if the checksum mismatch is detected,
2038 // which is currently only available for pseudo-probe mode. Removing the
2039 // checksum check could cause regressions for some cases, so further tuning
2040 // might be needed if we want to enable it for all cases.
2041 if (Reader->profileIsProbeBased()) {
2043 SalvageStaleProfile = true;
2045 SalvageUnusedProfile = true;
2046 }
2047
2048 if (!Reader->profileIsCS()) {
2049 // Non-CS profile should be fine without a function size budget for the
2050 // inliner since the contexts in the profile are either all from inlining
2051 // in the prevoius build or pre-computed by the preinliner with a size
2052 // cap, thus they are bounded.
2053 if (!ProfileInlineLimitMin.getNumOccurrences())
2054 ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
2055 if (!ProfileInlineLimitMax.getNumOccurrences())
2056 ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
2057 }
2058 }
2059
2060 if (Reader->profileIsCS()) {
2061 // Tracker for profiles under different context
2062 ContextTracker = std::make_unique<SampleContextTracker>(
2063 Reader->getProfiles(), &GUIDToFuncNameMap);
2064 }
2065
2066 // Load pseudo probe descriptors for probe-based function samples.
2067 if (Reader->profileIsProbeBased()) {
2068 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2069 if (!ProbeManager->moduleIsProbed(M)) {
2070 const char *Msg =
2071 "Pseudo-probe-based profile requires SampleProfileProbePass";
2072 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2073 DS_Warning));
2074 return false;
2075 }
2076 }
2077
2080 MatchingManager = std::make_unique<SampleProfileMatcher>(
2081 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2082 FuncNameToProfNameMap);
2083 }
2084
2085 return true;
2086}
2087
2088// Note that this is a module-level check. Even if one module is errored out,
2089// the entire build will be errored out. However, the user could make big
2090// changes to functions in single module but those changes might not be
2091// performance significant to the whole binary. Therefore, to avoid those false
2092// positives, we select a reasonable big set of hot functions that are supposed
2093// to be globally performance significant, only compute and check the mismatch
2094// within those functions. The function selection is based on two criteria:
2095// 1) The function is hot enough, which is tuned by a hotness-based
2096// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
2097// which is tuned by the MinfuncsForStalenessError flag.
2098bool SampleProfileLoader::rejectHighStalenessProfile(
2099 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
2101 "Only support for probe-based profile");
2102 uint64_t TotalHotFunc = 0;
2103 uint64_t NumMismatchedFunc = 0;
2104 for (const auto &I : Profiles) {
2105 const auto &FS = I.second;
2106 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
2107 if (!FuncDesc)
2108 continue;
2109
2110 // Use a hotness-based threshold to control the function selection.
2112 FS.getTotalSamples()))
2113 continue;
2114
2115 TotalHotFunc++;
2116 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS) &&
2117 !ProbeManager->probeFromWeakSymbol(FS.getGUID()))
2118 NumMismatchedFunc++;
2119 }
2120 // Make sure that the num of selected function is not too small to distinguish
2121 // from the user's benign changes.
2122 if (TotalHotFunc < MinfuncsForStalenessError)
2123 return false;
2124
2125 // Finally check the mismatch percentage against the threshold.
2126 if (NumMismatchedFunc * 100 >=
2127 TotalHotFunc * PrecentMismatchForStalenessError) {
2128 auto &Ctx = M.getContext();
2129 const char *Msg =
2130 "The input profile significantly mismatches current source code. "
2131 "Please recollect profile to avoid performance regression.";
2132 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
2133 return true;
2134 }
2135 return false;
2136}
2137
2138void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) {
2139 for (auto &F : M) {
2140 std::vector<Instruction *> InstsToDel;
2141 for (auto &BB : F) {
2142 for (auto &I : BB) {
2143 if (isa<PseudoProbeInst>(&I))
2144 InstsToDel.push_back(&I);
2145 else if (isa<CallBase>(&I))
2146 if (const DILocation *DIL = I.getDebugLoc().get()) {
2147 // Restore dwarf discriminator for call.
2148 unsigned Discriminator = DIL->getDiscriminator();
2149 if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
2150 std::optional<uint32_t> DwarfDiscriminator =
2152 Discriminator);
2153 I.setDebugLoc(
2154 DIL->cloneWithDiscriminator(DwarfDiscriminator.value_or(0)));
2155 }
2156 }
2157 }
2158 }
2159 for (auto *I : InstsToDel)
2160 I->eraseFromParent();
2161 }
2162}
2163
2164bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager &AM,
2165 ProfileSummaryInfo *_PSI) {
2166 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2167
2168 PSI = _PSI;
2169 if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2170 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2172 PSI->refresh();
2173 }
2174
2176 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2177 return false;
2178
2179 auto Remapper = Reader->getRemapper();
2180 // Populate the symbol map.
2181 for (const auto &N_F : M.getValueSymbolTable()) {
2182 StringRef OrigName = N_F.getKey();
2183 Function *F = dyn_cast<Function>(N_F.getValue());
2184 if (F == nullptr || OrigName.empty())
2185 continue;
2186 SymbolMap[FunctionId(OrigName)] = F;
2187 StringRef NewName = FunctionSamples::getCanonicalFnName(*F);
2188 if (OrigName != NewName && !NewName.empty()) {
2189 auto r = SymbolMap.emplace(FunctionId(NewName), F);
2190 // Failiing to insert means there is already an entry in SymbolMap,
2191 // thus there are multiple functions that are mapped to the same
2192 // stripped name. In this case of name conflicting, set the value
2193 // to nullptr to avoid confusion.
2194 if (!r.second)
2195 r.first->second = nullptr;
2196 OrigName = NewName;
2197 }
2198 // Insert the remapped names into SymbolMap.
2199 if (Remapper) {
2200 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2201 if (*MapName != OrigName && !MapName->empty())
2202 SymbolMap.emplace(FunctionId(*MapName), F);
2203 }
2204 }
2205 }
2206
2207 // Stale profile matching.
2210 MatchingManager->runOnModule();
2211 MatchingManager->clearMatchingData();
2212 }
2213 assert(SymbolMap.count(FunctionId()) == 0 &&
2214 "No empty StringRef should be added in SymbolMap");
2215 assert((SalvageUnusedProfile || FuncNameToProfNameMap.empty()) &&
2216 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2217 "not enabled");
2218
2219 bool retval = false;
2220 for (auto *F : buildFunctionOrder(M, CG)) {
2221 assert(!F->isDeclaration());
2222 clearFunctionData();
2223 retval |= runOnFunction(*F, AM);
2224 }
2225
2226 // Account for cold calls not inlined....
2228 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2229 notInlinedCallInfo)
2230 updateProfileCallee(pair.first, pair.second.entryCount);
2231
2234 removePseudoProbeInstsDiscriminator(M);
2235 if (auto *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName))
2236 M.eraseNamedMetadata(FuncInfo);
2237 }
2238
2239 return retval;
2240}
2241
2242bool SampleProfileLoader::runOnFunction(Function &F,
2244 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2245 DILocation2SampleMap.clear();
2246 // By default the entry count is initialized to -1, which will be treated
2247 // conservatively by getEntryCount as the same as unknown (None). This is
2248 // to avoid newly added code to be treated as cold. If we have samples
2249 // this will be overwritten in emitAnnotations.
2250 uint64_t initialEntryCount = -1;
2251
2252 ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2253 if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2254 // initialize all the function entry counts to 0. It means all the
2255 // functions without profile will be regarded as cold.
2256 initialEntryCount = 0;
2257 // profile-sample-accurate is a user assertion which has a higher precedence
2258 // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2259 ProfAccForSymsInList = false;
2260 }
2261 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2262
2263 // PSL -- profile symbol list include all the symbols in sampled binary.
2264 // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2265 // old functions without samples being cold, without having to worry
2266 // about new and hot functions being mistakenly treated as cold.
2267 if (ProfAccForSymsInList) {
2268 // Initialize the entry count to 0 for functions in the list.
2269 if (PSL->contains(F.getName()))
2270 initialEntryCount = 0;
2271
2272 // Function in the symbol list but without sample will be regarded as
2273 // cold. To minimize the potential negative performance impact it could
2274 // have, we want to be a little conservative here saying if a function
2275 // shows up in the profile, no matter as outline function, inline instance
2276 // or call targets, treat the function as not being cold. This will handle
2277 // the cases such as most callsites of a function are inlined in sampled
2278 // binary but not inlined in current build (because of source code drift,
2279 // imprecise debug information, or the callsites are all cold individually
2280 // but not cold accumulatively...), so the outline function showing up as
2281 // cold in sampled binary will actually not be cold after current build.
2282 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
2284 GUIDsInProfile.count(
2285 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2286 (!FunctionSamples::UseMD5 && NamesInProfile.count(CanonName)))
2287 initialEntryCount = -1;
2288 }
2289
2290 // Initialize entry count when the function has no existing entry
2291 // count value.
2292 if (!F.getEntryCount())
2293 F.setEntryCount(initialEntryCount);
2294 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
2295 .getManager();
2296 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2297
2299 Samples = ContextTracker->getBaseSamplesFor(F);
2300 else {
2301 Samples = Reader->getSamplesFor(F);
2302 // Try search in previously inlined functions that were split or duplicated
2303 // into base.
2304 if (!Samples) {
2305 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
2306 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2307 if (It != OutlineFunctionSamples.end()) {
2308 Samples = &It->second;
2309 } else if (auto Remapper = Reader->getRemapper()) {
2310 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2311 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2312 if (It != OutlineFunctionSamples.end())
2313 Samples = &It->second;
2314 }
2315 }
2316 }
2317 }
2318
2319 if (Samples && !Samples->empty())
2320 return emitAnnotations(F);
2321 return false;
2322}
2324 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2325 IntrusiveRefCntPtr<vfs::FileSystem> FS, bool DisableSampleProfileInlining,
2326 bool UseFlattenedProfile)
2327 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2328 LTOPhase(LTOPhase), FS(std::move(FS)),
2329 DisableSampleProfileInlining(DisableSampleProfileInlining),
2330 UseFlattenedProfile(UseFlattenedProfile) {}
2331
2336
2337 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2338 return FAM.getResult<AssumptionAnalysis>(F);
2339 };
2340 auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2341 return FAM.getResult<TargetIRAnalysis>(F);
2342 };
2343 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2344 return FAM.getResult<TargetLibraryAnalysis>(F);
2345 };
2346
2347 if (!FS)
2350
2351 SampleProfileLoader SampleLoader(
2352 ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2353 ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2354 : ProfileRemappingFileName,
2355 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2356 DisableSampleProfileInlining, UseFlattenedProfile);
2357 if (!SampleLoader.doInitialization(M, &FAM))
2358 return PreservedAnalyses::all();
2359
2361 if (!SampleLoader.runOnModule(M, AM, PSI))
2362 return PreservedAnalyses::all();
2363
2364 return PreservedAnalyses::none();
2365}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
#define DEBUG_TYPE
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static LVReader * CurrentReader
Definition LVReader.cpp:179
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static constexpr StringLiteral Filename
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
#define CSINLINE_DEBUG
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
Value * LHS
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
A debug info location.
Definition DebugLoc.h:126
LLVM_ABI unsigned getLine() const
Definition DebugLoc.cpp:43
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
Represents either an error or a value T.
Definition ErrorOr.h:56
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:346
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:132
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:127
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition InlineCost.h:121
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
ValueT lookup(const KeyT &Key) const
Definition MapVector.h:110
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:210
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:274
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:190
const ParentTy * getParent() const
Definition ilist_node.h:34
Representation of the samples collected for a function.
Definition SampleProf.h:792
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< DenseMap, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Definition SampleProf.h:655
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
DenseMap< FunctionId, uint64_t > CallTargetMap
Definition SampleProf.h:373
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition SampleProf.h:442
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition SampleProf.h:451
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
Definition SampleProf.h:782
LLVM_ABI bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
Definition CFG.h:141
InstructionCost Cost
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, bool TrackInlineHistory=false, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition Metadata.h:59
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< std::string > SampleProfileFile
constexpr const char * PseudoProbeDescMetadataName
Definition PseudoProbe.h:26
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition InlineCost.h:244
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition InlineCost.h:238
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)
Definition PseudoProbe.h:81