LLVM  14.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/Twine.h"
44 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/IR/DiagnosticInfo.h"
56 #include "llvm/IR/Dominators.h"
57 #include "llvm/IR/Function.h"
58 #include "llvm/IR/GlobalValue.h"
59 #include "llvm/IR/InstrTypes.h"
60 #include "llvm/IR/Instruction.h"
61 #include "llvm/IR/Instructions.h"
62 #include "llvm/IR/IntrinsicInst.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/MDBuilder.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/PassManager.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorOr.h"
80 #include "llvm/Transforms/IPO.h"
89 #include <algorithm>
90 #include <cassert>
91 #include <cstdint>
92 #include <functional>
93 #include <limits>
94 #include <map>
95 #include <memory>
96 #include <queue>
97 #include <string>
98 #include <system_error>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 using namespace sampleprof;
104 using namespace llvm::sampleprofutil;
106 #define DEBUG_TYPE "sample-profile"
107 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
108 
109 STATISTIC(NumCSInlined,
110  "Number of functions inlined with context sensitive profile");
111 STATISTIC(NumCSNotInlined,
112  "Number of functions not inlined with context sensitive profile");
113 STATISTIC(NumMismatchedProfile,
114  "Number of functions with CFG mismatched profile");
115 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
116 STATISTIC(NumDuplicatedInlinesite,
117  "Number of inlined callsites with a partial distribution factor");
118 
119 STATISTIC(NumCSInlinedHitMinLimit,
120  "Number of functions with FDO inline stopped due to min size limit");
121 STATISTIC(NumCSInlinedHitMaxLimit,
122  "Number of functions with FDO inline stopped due to max size limit");
123 STATISTIC(
124  NumCSInlinedHitGrowthLimit,
125  "Number of functions with FDO inline stopped due to growth size limit");
126 
127 // Command line option to specify the file to read samples from. This is
128 // mainly used for debugging.
130  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
131  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
132 
133 // The named file contains a set of transformations that may have been applied
134 // to the symbol names between the program from which the sample data was
135 // collected and the current program's symbols.
137  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
138  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
139 
141  "profile-sample-accurate", cl::Hidden, cl::init(false),
142  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
143  "callsite and function as having 0 samples. Otherwise, treat "
144  "un-sampled callsites and functions conservatively as unknown. "));
145 
147  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
148  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
149  "branches and calls as having 0 samples. Otherwise, treat "
150  "them conservatively as unknown. "));
151 
153  "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
154  cl::init(true),
155  cl::desc("For symbols in profile symbol list, regard their profiles to "
156  "be accurate. It may be overriden by profile-sample-accurate. "));
157 
159  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
160  cl::desc("Merge past inlinee's profile to outline version if sample "
161  "profile loader decided not to inline a call site. It will "
162  "only be enabled when top-down order of profile loading is "
163  "enabled. "));
164 
166  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
167  cl::desc("Do profile annotation and inlining for functions in top-down "
168  "order of call graph during sample profile loading. It only "
169  "works for new pass manager. "));
170 
171 static cl::opt<bool>
172  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
173  cl::desc("Process functions in a top-down order "
174  "defined by the profiled call graph when "
175  "-sample-profile-top-down-load is on."));
176 
178  "sample-profile-inline-size", cl::Hidden, cl::init(false),
179  cl::desc("Inline cold call sites in profile loader if it's beneficial "
180  "for code size."));
181 
183  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
184  cl::desc("The size growth ratio limit for proirity-based sample profile "
185  "loader inlining."));
186 
188  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
189  cl::desc("The lower bound of size growth limit for "
190  "proirity-based sample profile loader inlining."));
191 
193  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
194  cl::desc("The upper bound of size growth limit for "
195  "proirity-based sample profile loader inlining."));
196 
198  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
199  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
200  "inlining."));
201 
203  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
204  cl::desc("Threshold for inlining cold callsites"));
205 
207  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
208  cl::desc(
209  "Relative hotness percentage threshold for indirect "
210  "call promotion in proirity-based sample profile loader inlining."));
211 
213  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
214  cl::desc(
215  "Skip relative hotness check for ICP up to given number of targets."));
216 
218  "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
219  cl::init(false),
220  cl::desc("Use call site prioritized inlining for sample profile loader."
221  "Currently only CSSPGO is supported."));
222 
224  "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
225  cl::init(false),
226  cl::desc("Use the preinliner decisions stored in profile context."));
227 
229  "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
230  cl::init(false),
231  cl::desc("Allow sample loader inliner to inline recursive calls."));
232 
234  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
235  cl::desc(
236  "Optimization remarks file containing inline remarks to be replayed "
237  "by inlining from sample profile loader."),
238  cl::Hidden);
239 
241  "sample-profile-inline-replay-scope",
244  "Replay on functions that have remarks associated "
245  "with them (default)"),
247  "Replay on the entire module")),
248  cl::desc("Whether inline replay should be applied to the entire "
249  "Module or just the Functions (default) that are present as "
250  "callers in remarks during sample profile inlining."),
251  cl::Hidden);
252 
254  "sample-profile-inline-replay-fallback",
256  cl::values(
257  clEnumValN(
259  "All decisions not in replay send to original advisor (default)"),
261  "AlwaysInline", "All decisions not in replay are inlined"),
263  "All decisions not in replay are not inlined")),
264  cl::desc("How sample profile inline replay treats sites that don't come "
265  "from the replay. Original: defers to original advisor, "
266  "AlwaysInline: inline all sites not in replay, NeverInline: "
267  "inline no sites not in replay"),
268  cl::Hidden);
269 
271  "sample-profile-inline-replay-format",
273  cl::values(
274  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
276  "<Line Number>:<Column Number>"),
278  "LineDiscriminator", "<Line Number>.<Discriminator>"),
280  "LineColumnDiscriminator",
281  "<Line Number>:<Column Number>.<Discriminator> (default)")),
282  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
283 
284 static cl::opt<unsigned>
285  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
287  cl::desc("Max number of promotions for a single indirect "
288  "call callsite in sample profile loader"));
289 
291  "overwrite-existing-weights", cl::Hidden, cl::init(false),
292  cl::desc("Ignore existing branch weights on IR and always overwrite."));
293 
294 namespace {
295 
296 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
297 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
298 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
299 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
300 using BlockEdgeMap =
302 
303 class GUIDToFuncNameMapper {
304 public:
305  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
306  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
307  : CurrentReader(Reader), CurrentModule(M),
308  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
309  if (!CurrentReader.useMD5())
310  return;
311 
312  for (const auto &F : CurrentModule) {
313  StringRef OrigName = F.getName();
314  CurrentGUIDToFuncNameMap.insert(
315  {Function::getGUID(OrigName), OrigName});
316 
317  // Local to global var promotion used by optimization like thinlto
318  // will rename the var and add suffix like ".llvm.xxx" to the
319  // original local name. In sample profile, the suffixes of function
320  // names are all stripped. Since it is possible that the mapper is
321  // built in post-thin-link phase and var promotion has been done,
322  // we need to add the substring of function name without the suffix
323  // into the GUIDToFuncNameMap.
325  if (CanonName != OrigName)
326  CurrentGUIDToFuncNameMap.insert(
327  {Function::getGUID(CanonName), CanonName});
328  }
329 
330  // Update GUIDToFuncNameMap for each function including inlinees.
331  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
332  }
333 
334  ~GUIDToFuncNameMapper() {
335  if (!CurrentReader.useMD5())
336  return;
337 
338  CurrentGUIDToFuncNameMap.clear();
339 
340  // Reset GUIDToFuncNameMap for of each function as they're no
341  // longer valid at this point.
342  SetGUIDToFuncNameMapForAll(nullptr);
343  }
344 
345 private:
346  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
347  std::queue<FunctionSamples *> FSToUpdate;
348  for (auto &IFS : CurrentReader.getProfiles()) {
349  FSToUpdate.push(&IFS.second);
350  }
351 
352  while (!FSToUpdate.empty()) {
353  FunctionSamples *FS = FSToUpdate.front();
354  FSToUpdate.pop();
355  FS->GUIDToFuncNameMap = Map;
356  for (const auto &ICS : FS->getCallsiteSamples()) {
357  const FunctionSamplesMap &FSMap = ICS.second;
358  for (auto &IFS : FSMap) {
359  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
360  FSToUpdate.push(&FS);
361  }
362  }
363  }
364  }
365 
366  SampleProfileReader &CurrentReader;
367  Module &CurrentModule;
368  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
369 };
370 
371 // Inline candidate used by iterative callsite prioritized inliner
372 struct InlineCandidate {
373  CallBase *CallInstr;
374  const FunctionSamples *CalleeSamples;
375  // Prorated callsite count, which will be used to guide inlining. For example,
376  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
377  // copies will get their own distribution factors and their prorated counts
378  // will be used to decide if they should be inlined independently.
379  uint64_t CallsiteCount;
380  // Call site distribution factor to prorate the profile samples for a
381  // duplicated callsite. Default value is 1.0.
382  float CallsiteDistribution;
383 };
384 
385 // Inline candidate comparer using call site weight
386 struct CandidateComparer {
387  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
388  if (LHS.CallsiteCount != RHS.CallsiteCount)
389  return LHS.CallsiteCount < RHS.CallsiteCount;
390 
391  const FunctionSamples *LCS = LHS.CalleeSamples;
392  const FunctionSamples *RCS = RHS.CalleeSamples;
393  assert(LCS && RCS && "Expect non-null FunctionSamples");
394 
395  // Tie breaker using number of samples try to favor smaller functions first
396  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
397  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
398 
399  // Tie breaker using GUID so we have stable/deterministic inlining order
400  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
401  }
402 };
403 
404 using CandidateQueue =
406  CandidateComparer>;
407 
408 /// Sample profile pass.
409 ///
410 /// This pass reads profile data from the file specified by
411 /// -sample-profile-file and annotates every affected function with the
412 /// profile information found in that file.
413 class SampleProfileLoader final
414  : public SampleProfileLoaderBaseImpl<BasicBlock> {
415 public:
416  SampleProfileLoader(
417  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
418  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
419  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
420  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
422  GetAC(std::move(GetAssumptionCache)),
423  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
424  LTOPhase(LTOPhase) {}
425 
426  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
427  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
428  ProfileSummaryInfo *_PSI, CallGraph *CG);
429 
430 protected:
432  bool emitAnnotations(Function &F);
433  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
434  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
435  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
436  const FunctionSamples *
437  findFunctionSamples(const Instruction &I) const override;
438  std::vector<const FunctionSamples *>
439  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
440  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
441  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
444  // Attempt to promote indirect call and also inline the promoted call
445  bool tryPromoteAndInlineCandidate(
446  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
447  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
448 
449  bool inlineHotFunctions(Function &F,
450  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
451  Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
452  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
453  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
454  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
455  bool
456  tryInlineCandidate(InlineCandidate &Candidate,
457  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
458  bool
459  inlineHotFunctionsWithPriority(Function &F,
460  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
461  // Inline cold/small functions in addition to hot ones
462  bool shouldInlineColdCallee(CallBase &CallInst);
463  void emitOptimizationRemarksForInlineCandidates(
464  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
465  bool Hot);
466  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
467  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
468  void generateMDProfMetadata(Function &F);
469 
470  /// Map from function name to Function *. Used to find the function from
471  /// the function name. If the function name contains suffix, additional
472  /// entry is added to map from the stripped name to the function if there
473  /// is one-to-one mapping.
475 
478  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
479 
480  /// Profile tracker for different context.
481  std::unique_ptr<SampleContextTracker> ContextTracker;
482 
483  /// Flag indicating whether input profile is context-sensitive
484  bool ProfileIsCS = false;
485 
486  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
487  ///
488  /// We need to know the LTO phase because for example in ThinLTOPrelink
489  /// phase, in annotation, we should not promote indirect calls. Instead,
490  /// we will mark GUIDs that needs to be annotated to the function.
491  ThinOrFullLTOPhase LTOPhase;
492 
493  /// Profle Symbol list tells whether a function name appears in the binary
494  /// used to generate the current profile.
495  std::unique_ptr<ProfileSymbolList> PSL;
496 
497  /// Total number of samples collected in this profile.
498  ///
499  /// This is the sum of all the samples collected in all the functions executed
500  /// at runtime.
501  uint64_t TotalCollectedSamples = 0;
502 
503  // Information recorded when we declined to inline a call site
504  // because we have determined it is too cold is accumulated for
505  // each callee function. Initially this is just the entry count.
506  struct NotInlinedProfileInfo {
507  uint64_t entryCount;
508  };
510 
511  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
512  // all the function symbols defined or declared in current module.
513  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
514 
515  // All the Names used in FunctionSamples including outline function
516  // names, inline instance names and call target names.
517  StringSet<> NamesInProfile;
518 
519  // For symbol in profile symbol list, whether to regard their profiles
520  // to be accurate. It is mainly decided by existance of profile symbol
521  // list and -profile-accurate-for-symsinlist flag, but it can be
522  // overriden by -profile-sample-accurate or profile-sample-accurate
523  // attribute.
524  bool ProfAccForSymsInList;
525 
526  // External inline advisor used to replay inline decision from remarks.
527  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
528 
529  // A pseudo probe helper to correlate the imported sample counts.
530  std::unique_ptr<PseudoProbeManager> ProbeManager;
531 };
532 
533 class SampleProfileLoaderLegacyPass : public ModulePass {
534 public:
535  // Class identification, replacement for typeinfo
536  static char ID;
537 
538  SampleProfileLoaderLegacyPass(
541  : ModulePass(ID), SampleLoader(
542  Name, SampleProfileRemappingFile, LTOPhase,
543  [&](Function &F) -> AssumptionCache & {
544  return ACT->getAssumptionCache(F);
545  },
546  [&](Function &F) -> TargetTransformInfo & {
547  return TTIWP->getTTI(F);
548  },
549  [&](Function &F) -> TargetLibraryInfo & {
550  return TLIWP->getTLI(F);
551  }) {
554  }
555 
556  void dump() { SampleLoader.dump(); }
557 
558  bool doInitialization(Module &M) override {
559  return SampleLoader.doInitialization(M);
560  }
561 
562  StringRef getPassName() const override { return "Sample profile pass"; }
563  bool runOnModule(Module &M) override;
564 
565  void getAnalysisUsage(AnalysisUsage &AU) const override {
570  }
571 
572 private:
573  SampleProfileLoader SampleLoader;
574  AssumptionCacheTracker *ACT = nullptr;
575  TargetTransformInfoWrapperPass *TTIWP = nullptr;
576  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
577 };
578 
579 } // end anonymous namespace
580 
581 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
583  return getProbeWeight(Inst);
584 
585  const DebugLoc &DLoc = Inst.getDebugLoc();
586  if (!DLoc)
587  return std::error_code();
588 
589  // Ignore all intrinsics, phinodes and branch instructions.
590  // Branch and phinodes instruction usually contains debug info from sources
591  // outside of the residing basic block, thus we ignore them during annotation.
592  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
593  return std::error_code();
594 
595  // For non-CS profile, if a direct call/invoke instruction is inlined in
596  // profile (findCalleeFunctionSamples returns non-empty result), but not
597  // inlined here, it means that the inlined callsite has no sample, thus the
598  // call instruction should have 0 count.
599  // For CS profile, the callsite count of previously inlined callees is
600  // populated with the entry count of the callees.
601  if (!ProfileIsCS)
602  if (const auto *CB = dyn_cast<CallBase>(&Inst))
603  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
604  return 0;
605 
606  return getInstWeightImpl(Inst);
607 }
608 
609 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
610 // of non-probe instruction. So if all instructions of the BB give error_code,
611 // tell the inference algorithm to infer the BB weight.
612 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
614  "Profile is not pseudo probe based");
615  Optional<PseudoProbe> Probe = extractProbe(Inst);
616  // Ignore the non-probe instruction. If none of the instruction in the BB is
617  // probe, we choose to infer the BB's weight.
618  if (!Probe)
619  return std::error_code();
620 
621  const FunctionSamples *FS = findFunctionSamples(Inst);
622  // If none of the instruction has FunctionSample, we choose to return zero
623  // value sample to indicate the BB is cold. This could happen when the
624  // instruction is from inlinee and no profile data is found.
625  // FIXME: This should not be affected by the source drift issue as 1) if the
626  // newly added function is top-level inliner, it won't match the CFG checksum
627  // in the function profile or 2) if it's the inlinee, the inlinee should have
628  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
629  // we can improve it by adding a switch for profile-sample-block-accurate for
630  // block level counts in the future.
631  if (!FS)
632  return 0;
633 
634  // For non-CS profile, If a direct call/invoke instruction is inlined in
635  // profile (findCalleeFunctionSamples returns non-empty result), but not
636  // inlined here, it means that the inlined callsite has no sample, thus the
637  // call instruction should have 0 count.
638  // For CS profile, the callsite count of previously inlined callees is
639  // populated with the entry count of the callees.
640  if (!ProfileIsCS)
641  if (const auto *CB = dyn_cast<CallBase>(&Inst))
642  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
643  return 0;
644 
645  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
646  if (R) {
647  uint64_t Samples = R.get() * Probe->Factor;
648  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
649  if (FirstMark) {
650  ORE->emit([&]() {
651  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
652  Remark << "Applied " << ore::NV("NumSamples", Samples);
653  Remark << " samples from profile (ProbeId=";
654  Remark << ore::NV("ProbeId", Probe->Id);
655  Remark << ", Factor=";
656  Remark << ore::NV("Factor", Probe->Factor);
657  Remark << ", OriginalSamples=";
658  Remark << ore::NV("OriginalSamples", R.get());
659  Remark << ")";
660  return Remark;
661  });
662  }
663  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
664  << " - weight: " << R.get() << " - factor: "
665  << format("%0.2f", Probe->Factor) << ")\n");
666  return Samples;
667  }
668  return R;
669 }
670 
671 /// Get the FunctionSamples for a call instruction.
672 ///
673 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
674 /// instance in which that call instruction is calling to. It contains
675 /// all samples that resides in the inlined instance. We first find the
676 /// inlined instance in which the call instruction is from, then we
677 /// traverse its children to find the callsite with the matching
678 /// location.
679 ///
680 /// \param Inst Call/Invoke instruction to query.
681 ///
682 /// \returns The FunctionSamples pointer to the inlined instance.
683 const FunctionSamples *
684 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
685  const DILocation *DIL = Inst.getDebugLoc();
686  if (!DIL) {
687  return nullptr;
688  }
689 
690  StringRef CalleeName;
691  if (Function *Callee = Inst.getCalledFunction())
692  CalleeName = Callee->getName();
693 
694  if (ProfileIsCS)
695  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
696 
697  const FunctionSamples *FS = findFunctionSamples(Inst);
698  if (FS == nullptr)
699  return nullptr;
700 
701  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
702  CalleeName, Reader->getRemapper());
703 }
704 
705 /// Returns a vector of FunctionSamples that are the indirect call targets
706 /// of \p Inst. The vector is sorted by the total number of samples. Stores
707 /// the total call count of the indirect call in \p Sum.
708 std::vector<const FunctionSamples *>
709 SampleProfileLoader::findIndirectCallFunctionSamples(
710  const Instruction &Inst, uint64_t &Sum) const {
711  const DILocation *DIL = Inst.getDebugLoc();
712  std::vector<const FunctionSamples *> R;
713 
714  if (!DIL) {
715  return R;
716  }
717 
718  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
719  assert(L && R && "Expect non-null FunctionSamples");
720  if (L->getEntrySamples() != R->getEntrySamples())
721  return L->getEntrySamples() > R->getEntrySamples();
722  return FunctionSamples::getGUID(L->getName()) <
723  FunctionSamples::getGUID(R->getName());
724  };
725 
726  if (ProfileIsCS) {
727  auto CalleeSamples =
728  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
729  if (CalleeSamples.empty())
730  return R;
731 
732  // For CSSPGO, we only use target context profile's entry count
733  // as that already includes both inlined callee and non-inlined ones..
734  Sum = 0;
735  for (const auto *const FS : CalleeSamples) {
736  Sum += FS->getEntrySamples();
737  R.push_back(FS);
738  }
739  llvm::sort(R, FSCompare);
740  return R;
741  }
742 
743  const FunctionSamples *FS = findFunctionSamples(Inst);
744  if (FS == nullptr)
745  return R;
746 
748  auto T = FS->findCallTargetMapAt(CallSite);
749  Sum = 0;
750  if (T)
751  for (const auto &T_C : T.get())
752  Sum += T_C.second;
753  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
754  if (M->empty())
755  return R;
756  for (const auto &NameFS : *M) {
757  Sum += NameFS.second.getEntrySamples();
758  R.push_back(&NameFS.second);
759  }
760  llvm::sort(R, FSCompare);
761  }
762  return R;
763 }
764 
765 const FunctionSamples *
766 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
768  Optional<PseudoProbe> Probe = extractProbe(Inst);
769  if (!Probe)
770  return nullptr;
771  }
772 
773  const DILocation *DIL = Inst.getDebugLoc();
774  if (!DIL)
775  return Samples;
776 
777  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
778  if (it.second) {
779  if (ProfileIsCS)
780  it.first->second = ContextTracker->getContextSamplesFor(DIL);
781  else
782  it.first->second =
783  Samples->findFunctionSamples(DIL, Reader->getRemapper());
784  }
785  return it.first->second;
786 }
787 
788 /// Check whether the indirect call promotion history of \p Inst allows
789 /// the promotion for \p Candidate.
790 /// If the profile count for the promotion candidate \p Candidate is
791 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
792 /// for \p Inst. If we already have at least MaxNumPromotions
793 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
794 /// cannot promote for \p Inst anymore.
795 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
796  uint32_t NumVals = 0;
797  uint64_t TotalCount = 0;
798  std::unique_ptr<InstrProfValueData[]> ValueData =
799  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
800  bool Valid =
801  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
802  ValueData.get(), NumVals, TotalCount, true);
803  // No valid value profile so no promoted targets have been recorded
804  // before. Ok to do ICP.
805  if (!Valid)
806  return true;
807 
808  unsigned NumPromoted = 0;
809  for (uint32_t I = 0; I < NumVals; I++) {
810  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
811  continue;
812 
813  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
814  // metadata, it means the candidate has been promoted for this
815  // indirect call.
816  if (ValueData[I].Value == Function::getGUID(Candidate))
817  return false;
818  NumPromoted++;
819  // If already have MaxNumPromotions promotion, don't do it anymore.
820  if (NumPromoted == MaxNumPromotions)
821  return false;
822  }
823  return true;
824 }
825 
826 /// Update indirect call target profile metadata for \p Inst.
827 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
828 /// If it is 0, it means updateIDTMetaData is used to mark a
829 /// certain target to be promoted already. If it is not zero,
830 /// we expect to use it to update the total count in the value profile.
831 static void
833  const SmallVectorImpl<InstrProfValueData> &CallTargets,
834  uint64_t Sum) {
835  uint32_t NumVals = 0;
836  // OldSum is the existing total count in the value profile data.
837  uint64_t OldSum = 0;
838  std::unique_ptr<InstrProfValueData[]> ValueData =
839  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
840  bool Valid =
841  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
842  ValueData.get(), NumVals, OldSum, true);
843 
844  DenseMap<uint64_t, uint64_t> ValueCountMap;
845  if (Sum == 0) {
846  assert((CallTargets.size() == 1 &&
847  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
848  "If sum is 0, assume only one element in CallTargets "
849  "with count being NOMORE_ICP_MAGICNUM");
850  // Initialize ValueCountMap with existing value profile data.
851  if (Valid) {
852  for (uint32_t I = 0; I < NumVals; I++)
853  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
854  }
855  auto Pair =
856  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
857  // If the target already exists in value profile, decrease the total
858  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
859  if (!Pair.second) {
860  OldSum -= Pair.first->second;
861  Pair.first->second = NOMORE_ICP_MAGICNUM;
862  }
863  Sum = OldSum;
864  } else {
865  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
866  // counts in the value profile.
867  if (Valid) {
868  for (uint32_t I = 0; I < NumVals; I++) {
869  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
870  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
871  }
872  }
873 
874  for (const auto &Data : CallTargets) {
875  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
876  if (Pair.second)
877  continue;
878  // The target represented by Data.Value has already been promoted.
879  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
880  // Sum by Data.Count.
881  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
882  Sum -= Data.Count;
883  }
884  }
885 
886  SmallVector<InstrProfValueData, 8> NewCallTargets;
887  for (const auto &ValueCount : ValueCountMap) {
888  NewCallTargets.emplace_back(
889  InstrProfValueData{ValueCount.first, ValueCount.second});
890  }
891 
892  llvm::sort(NewCallTargets,
893  [](const InstrProfValueData &L, const InstrProfValueData &R) {
894  if (L.Count != R.Count)
895  return L.Count > R.Count;
896  return L.Value > R.Value;
897  });
898 
899  uint32_t MaxMDCount =
900  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
901  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
902  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
903 }
904 
905 /// Attempt to promote indirect call and also inline the promoted call.
906 ///
907 /// \param F Caller function.
908 /// \param Candidate ICP and inline candidate.
909 /// \param SumOrigin Original sum of target counts for indirect call before
910 /// promoting given candidate.
911 /// \param Sum Prorated sum of remaining target counts for indirect call
912 /// after promoting given candidate.
913 /// \param InlinedCallSite Output vector for new call sites exposed after
914 /// inlining.
915 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
916  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
917  SmallVector<CallBase *, 8> *InlinedCallSite) {
918  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
919  auto R = SymbolMap.find(CalleeFunctionName);
920  if (R == SymbolMap.end() || !R->getValue())
921  return false;
922 
923  auto &CI = *Candidate.CallInstr;
924  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
925  return false;
926 
927  const char *Reason = "Callee function not available";
928  // R->getValue() != &F is to prevent promoting a recursive call.
929  // If it is a recursive call, we do not inline it as it could bloat
930  // the code exponentially. There is way to better handle this, e.g.
931  // clone the caller first, and inline the cloned caller if it is
932  // recursive. As llvm does not inline recursive calls, we will
933  // simply ignore it instead of handling it explicitly.
934  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
935  R->getValue()->hasFnAttribute("use-sample-profile") &&
936  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
937  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
938  // in the value profile metadata so the target won't be promoted again.
939  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
940  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
941  updateIDTMetaData(CI, SortedCallTargets, 0);
942 
943  auto *DI = &pgo::promoteIndirectCall(
944  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
945  if (DI) {
946  Sum -= Candidate.CallsiteCount;
947  // Do not prorate the indirect callsite distribution since the original
948  // distribution will be used to scale down non-promoted profile target
949  // counts later. By doing this we lose track of the real callsite count
950  // for the leftover indirect callsite as a trade off for accurate call
951  // target counts.
952  // TODO: Ideally we would have two separate factors, one for call site
953  // counts and one is used to prorate call target counts.
954  // Do not update the promoted direct callsite distribution at this
955  // point since the original distribution combined with the callee profile
956  // will be used to prorate callsites from the callee if inlined. Once not
957  // inlined, the direct callsite distribution should be prorated so that
958  // the it will reflect the real callsite counts.
959  Candidate.CallInstr = DI;
960  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
961  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
962  if (!Inlined) {
963  // Prorate the direct callsite distribution so that it reflects real
964  // callsite counts.
966  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
967  }
968  return Inlined;
969  }
970  }
971  } else {
972  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
973  << Candidate.CalleeSamples->getFuncName() << " because "
974  << Reason << "\n");
975  }
976  return false;
977 }
978 
979 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
980  if (!ProfileSizeInline)
981  return false;
982 
984  if (Callee == nullptr)
985  return false;
986 
987  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
988  GetAC, GetTLI);
989 
990  if (Cost.isNever())
991  return false;
992 
993  if (Cost.isAlways())
994  return true;
995 
996  return Cost.getCost() <= SampleColdCallSiteThreshold;
997 }
998 
999 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1000  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1001  bool Hot) {
1002  for (auto I : Candidates) {
1003  Function *CalledFunction = I->getCalledFunction();
1004  if (CalledFunction) {
1005  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
1006  I->getDebugLoc(), I->getParent())
1007  << "previous inlining reattempted for "
1008  << (Hot ? "hotness: '" : "size: '")
1009  << ore::NV("Callee", CalledFunction) << "' into '"
1010  << ore::NV("Caller", &F) << "'");
1011  }
1012  }
1013 }
1014 
1015 void SampleProfileLoader::findExternalInlineCandidate(
1016  CallBase *CB, const FunctionSamples *Samples,
1017  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1019 
1020  // If ExternalInlineAdvisor wants to inline an external function
1021  // make sure it's imported
1022  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1023  // Samples may not exist for replayed function, if so
1024  // just add the direct GUID and move on
1025  if (!Samples) {
1026  InlinedGUIDs.insert(
1028  return;
1029  }
1030  // Otherwise, drop the threshold to import everything that we can
1031  Threshold = 0;
1032  }
1033 
1034  assert(Samples && "expect non-null caller profile");
1035 
1036  // For AutoFDO profile, retrieve candidate profiles by walking over
1037  // the nested inlinee profiles.
1038  if (!ProfileIsCS) {
1039  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1040  return;
1041  }
1042 
1044  ContextTracker->getContextFor(Samples->getContext());
1045  std::queue<ContextTrieNode *> CalleeList;
1046  CalleeList.push(Caller);
1047  while (!CalleeList.empty()) {
1048  ContextTrieNode *Node = CalleeList.front();
1049  CalleeList.pop();
1050  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1051  // For CSSPGO profile, retrieve candidate profile by walking over the
1052  // trie built for context profile. Note that also take call targets
1053  // even if callee doesn't have a corresponding context profile.
1054  if (!CalleeSample)
1055  continue;
1056 
1057  // If pre-inliner decision is used, honor that for importing as well.
1058  bool PreInline =
1061  if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
1062  continue;
1063 
1064  StringRef Name = CalleeSample->getFuncName();
1066  // Add to the import list only when it's defined out of module.
1067  if (!Func || Func->isDeclaration())
1068  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1069 
1070  // Import hot CallTargets, which may not be available in IR because full
1071  // profile annotation cannot be done until backend compilation in ThinLTO.
1072  for (const auto &BS : CalleeSample->getBodySamples())
1073  for (const auto &TS : BS.second.getCallTargets())
1074  if (TS.getValue() > Threshold) {
1075  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1076  const Function *Callee = SymbolMap.lookup(CalleeName);
1077  if (!Callee || Callee->isDeclaration())
1078  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1079  }
1080 
1081  // Import hot child context profile associted with callees. Note that this
1082  // may have some overlap with the call target loop above, but doing this
1083  // based child context profile again effectively allow us to use the max of
1084  // entry count and call target count to determine importing.
1085  for (auto &Child : Node->getAllChildContext()) {
1086  ContextTrieNode *CalleeNode = &Child.second;
1087  CalleeList.push(CalleeNode);
1088  }
1089  }
1090 }
1091 
1092 /// Iteratively inline hot callsites of a function.
1093 ///
1094 /// Iteratively traverse all callsites of the function \p F, and find if
1095 /// the corresponding inlined instance exists and is hot in profile. If
1096 /// it is hot enough, inline the callsites and adds new callsites of the
1097 /// callee into the caller. If the call is an indirect call, first promote
1098 /// it to direct call. Each indirect call is limited with a single target.
1099 ///
1100 /// \param F function to perform iterative inlining.
1101 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1102 /// inlined in the profiled binary.
1103 ///
1104 /// \returns True if there is any inline happened.
1105 bool SampleProfileLoader::inlineHotFunctions(
1106  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1107  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1108  // Profile symbol list is ignored when profile-sample-accurate is on.
1109  assert((!ProfAccForSymsInList ||
1111  !F.hasFnAttribute("profile-sample-accurate"))) &&
1112  "ProfAccForSymsInList should be false when profile-sample-accurate "
1113  "is enabled");
1114 
1115  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1116  bool Changed = false;
1117  bool LocalChanged = true;
1118  while (LocalChanged) {
1119  LocalChanged = false;
1121  for (auto &BB : F) {
1122  bool Hot = false;
1123  SmallVector<CallBase *, 10> AllCandidates;
1124  SmallVector<CallBase *, 10> ColdCandidates;
1125  for (auto &I : BB.getInstList()) {
1126  const FunctionSamples *FS = nullptr;
1127  if (auto *CB = dyn_cast<CallBase>(&I)) {
1128  if (!isa<IntrinsicInst>(I)) {
1129  if ((FS = findCalleeFunctionSamples(*CB))) {
1130  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1131  "GUIDToFuncNameMap has to be populated");
1132  AllCandidates.push_back(CB);
1133  if (FS->getEntrySamples() > 0 || ProfileIsCS)
1134  LocalNotInlinedCallSites.try_emplace(CB, FS);
1135  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1136  Hot = true;
1137  else if (shouldInlineColdCallee(*CB))
1138  ColdCandidates.push_back(CB);
1139  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1140  AllCandidates.push_back(CB);
1141  }
1142  }
1143  }
1144  }
1145  if (Hot || ExternalInlineAdvisor) {
1146  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1147  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1148  } else {
1149  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1150  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1151  }
1152  }
1153  for (CallBase *I : CIS) {
1154  Function *CalledFunction = I->getCalledFunction();
1155  InlineCandidate Candidate = {
1156  I,
1157  LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
1158  : nullptr,
1159  0 /* dummy count */, 1.0 /* dummy distribution factor */};
1160  // Do not inline recursive calls.
1161  if (CalledFunction == &F)
1162  continue;
1163  if (I->isIndirectCall()) {
1164  uint64_t Sum;
1165  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1166  uint64_t SumOrigin = Sum;
1167  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1168  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1169  PSI->getOrCompHotCountThreshold());
1170  continue;
1171  }
1172  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1173  continue;
1174 
1175  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1176  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1177  LocalNotInlinedCallSites.erase(I);
1178  LocalChanged = true;
1179  }
1180  }
1181  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1182  !CalledFunction->isDeclaration()) {
1183  if (tryInlineCandidate(Candidate)) {
1184  LocalNotInlinedCallSites.erase(I);
1185  LocalChanged = true;
1186  }
1187  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1188  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1189  InlinedGUIDs, SymbolMap,
1190  PSI->getOrCompHotCountThreshold());
1191  }
1192  }
1193  Changed |= LocalChanged;
1194  }
1195 
1196  // For CS profile, profile for not inlined context will be merged when
1197  // base profile is being trieved
1198  if (ProfileIsCS)
1199  return Changed;
1200 
1201  // Accumulate not inlined callsite information into notInlinedSamples
1202  for (const auto &Pair : LocalNotInlinedCallSites) {
1203  CallBase *I = Pair.getFirst();
1204  Function *Callee = I->getCalledFunction();
1205  if (!Callee || Callee->isDeclaration())
1206  continue;
1207 
1208  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
1209  I->getDebugLoc(), I->getParent())
1210  << "previous inlining not repeated: '"
1211  << ore::NV("Callee", Callee) << "' into '"
1212  << ore::NV("Caller", &F) << "'");
1213 
1214  ++NumCSNotInlined;
1215  const FunctionSamples *FS = Pair.getSecond();
1216  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1217  continue;
1218  }
1219 
1220  if (ProfileMergeInlinee) {
1221  // A function call can be replicated by optimizations like callsite
1222  // splitting or jump threading and the replicates end up sharing the
1223  // sample nested callee profile instead of slicing the original inlinee's
1224  // profile. We want to do merge exactly once by filtering out callee
1225  // profiles with a non-zero head sample count.
1226  if (FS->getHeadSamples() == 0) {
1227  // Use entry samples as head samples during the merge, as inlinees
1228  // don't have head samples.
1229  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1230  FS->getEntrySamples());
1231 
1232  // Note that we have to do the merge right after processing function.
1233  // This allows OutlineFS's profile to be used for annotation during
1234  // top-down processing of functions' annotation.
1235  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1236  OutlineFS->merge(*FS);
1237  }
1238  } else {
1239  auto pair =
1240  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1241  pair.first->second.entryCount += FS->getEntrySamples();
1242  }
1243  }
1244  return Changed;
1245 }
1246 
1247 bool SampleProfileLoader::tryInlineCandidate(
1248  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1249 
1250  CallBase &CB = *Candidate.CallInstr;
1251  Function *CalledFunction = CB.getCalledFunction();
1252  assert(CalledFunction && "Expect a callee with definition");
1253  DebugLoc DLoc = CB.getDebugLoc();
1254  BasicBlock *BB = CB.getParent();
1255 
1256  InlineCost Cost = shouldInlineCandidate(Candidate);
1257  if (Cost.isNever()) {
1258  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
1259  << "incompatible inlining");
1260  return false;
1261  }
1262 
1263  if (!Cost)
1264  return false;
1265 
1266  InlineFunctionInfo IFI(nullptr, GetAC);
1267  IFI.UpdateProfile = false;
1268  if (InlineFunction(CB, IFI).isSuccess()) {
1269  // Merge the attributes based on the inlining.
1271  *CalledFunction);
1272 
1273  // The call to InlineFunction erases I, so we can't pass it here.
1274  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
1275  *BB->getParent(), Cost, true, CSINLINE_DEBUG);
1276 
1277  // Now populate the list of newly exposed call sites.
1278  if (InlinedCallSites) {
1279  InlinedCallSites->clear();
1280  for (auto &I : IFI.InlinedCallSites)
1281  InlinedCallSites->push_back(I);
1282  }
1283 
1284  if (ProfileIsCS)
1285  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1286  ++NumCSInlined;
1287 
1288  // Prorate inlined probes for a duplicated inlining callsite which probably
1289  // has a distribution less than 100%. Samples for an inlinee should be
1290  // distributed among the copies of the original callsite based on each
1291  // callsite's distribution factor for counts accuracy. Note that an inlined
1292  // probe may come with its own distribution factor if it has been duplicated
1293  // in the inlinee body. The two factor are multiplied to reflect the
1294  // aggregation of duplication.
1295  if (Candidate.CallsiteDistribution < 1) {
1296  for (auto &I : IFI.InlinedCallSites) {
1297  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1299  Candidate.CallsiteDistribution);
1300  }
1301  NumDuplicatedInlinesite++;
1302  }
1303 
1304  return true;
1305  }
1306  return false;
1307 }
1308 
1309 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1310  CallBase *CB) {
1311  assert(CB && "Expect non-null call instruction");
1312 
1313  if (isa<IntrinsicInst>(CB))
1314  return false;
1315 
1316  // Find the callee's profile. For indirect call, find hottest target profile.
1317  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1318  // If ExternalInlineAdvisor wants to inline this site, do so even
1319  // if Samples are not present.
1320  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1321  return false;
1322 
1323  float Factor = 1.0;
1324  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1325  Factor = Probe->Factor;
1326 
1327  uint64_t CallsiteCount = 0;
1328  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
1329  if (Weight)
1330  CallsiteCount = Weight.get();
1331  if (CalleeSamples)
1332  CallsiteCount = std::max(
1333  CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
1334 
1335  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1336  return true;
1337 }
1338 
1340 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1341  std::unique_ptr<InlineAdvice> Advice = nullptr;
1342  if (ExternalInlineAdvisor) {
1343  Advice = ExternalInlineAdvisor->getAdvice(CB);
1344  if (Advice) {
1345  if (!Advice->isInliningRecommended()) {
1346  Advice->recordUnattemptedInlining();
1347  return InlineCost::getNever("not previously inlined");
1348  }
1349  Advice->recordInlining();
1350  return InlineCost::getAlways("previously inlined");
1351  }
1352  }
1353 
1354  return {};
1355 }
1356 
1357 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1358  Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1359  return Cost ? !!Cost.getValue() : false;
1360 }
1361 
1362 InlineCost
1363 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1364  if (Optional<InlineCost> ReplayCost =
1365  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1366  return ReplayCost.getValue();
1367  // Adjust threshold based on call site hotness, only do this for callsite
1368  // prioritized inliner because otherwise cost-benefit check is done earlier.
1369  int SampleThreshold = SampleColdCallSiteThreshold;
1371  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1372  SampleThreshold = SampleHotCallSiteThreshold;
1373  else if (!ProfileSizeInline)
1374  return InlineCost::getNever("cold callsite");
1375  }
1376 
1377  Function *Callee = Candidate.CallInstr->getCalledFunction();
1378  assert(Callee && "Expect a definition for inline candidate of direct call");
1379 
1380  InlineParams Params = getInlineParams();
1381  // We will ignore the threshold from inline cost, so always get full cost.
1382  Params.ComputeFullInlineCost = true;
1384  // Checks if there is anything in the reachable portion of the callee at
1385  // this callsite that makes this inlining potentially illegal. Need to
1386  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1387  // when cost exceeds threshold without checking all IRs in the callee.
1388  // The acutal cost does not matter because we only checks isNever() to
1389  // see if it is legal to inline the callsite.
1390  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1391  GetTTI(*Callee), GetAC, GetTLI);
1392 
1393  // Honor always inline and never inline from call analyzer
1394  if (Cost.isNever() || Cost.isAlways())
1395  return Cost;
1396 
1397  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1398  // decisions based on hotness as well as accurate function byte sizes for
1399  // given context using function/inlinee sizes from previous build. It
1400  // stores the decision in profile, and also adjust/merge context profile
1401  // aiming at better context-sensitive post-inline profile quality, assuming
1402  // all inline decision estimates are going to be honored by compiler. Here
1403  // we replay that inline decision under `sample-profile-use-preinliner`.
1404  // Note that we don't need to handle negative decision from preinliner as
1405  // context profile for not inlined calls are merged by preinliner already.
1406  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1407  // Once two node are merged due to promotion, we're losing some context
1408  // so the original context-sensitive preinliner decision should be ignored
1409  // for SyntheticContext.
1410  SampleContext &Context = Candidate.CalleeSamples->getContext();
1411  if (!Context.hasState(SyntheticContext) &&
1412  Context.hasAttribute(ContextShouldBeInlined))
1413  return InlineCost::getAlways("preinliner");
1414  }
1415 
1416  // For old FDO inliner, we inline the call site as long as cost is not
1417  // "Never". The cost-benefit check is done earlier.
1419  return InlineCost::get(Cost.getCost(), INT_MAX);
1420  }
1421 
1422  // Otherwise only use the cost from call analyzer, but overwite threshold with
1423  // Sample PGO threshold.
1424  return InlineCost::get(Cost.getCost(), SampleThreshold);
1425 }
1426 
1427 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1428  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1429  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
1430 
1431  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1432  // Profile symbol list is ignored when profile-sample-accurate is on.
1433  assert((!ProfAccForSymsInList ||
1435  !F.hasFnAttribute("profile-sample-accurate"))) &&
1436  "ProfAccForSymsInList should be false when profile-sample-accurate "
1437  "is enabled");
1438 
1439  // Populating worklist with initial call sites from root inliner, along
1440  // with call site weights.
1441  CandidateQueue CQueue;
1442  InlineCandidate NewCandidate;
1443  for (auto &BB : F) {
1444  for (auto &I : BB.getInstList()) {
1445  auto *CB = dyn_cast<CallBase>(&I);
1446  if (!CB)
1447  continue;
1448  if (getInlineCandidate(&NewCandidate, CB))
1449  CQueue.push(NewCandidate);
1450  }
1451  }
1452 
1453  // Cap the size growth from profile guided inlining. This is needed even
1454  // though cost of each inline candidate already accounts for callee size,
1455  // because with top-down inlining, we can grow inliner size significantly
1456  // with large number of smaller inlinees each pass the cost check.
1458  "Max inline size limit should not be smaller than min inline size "
1459  "limit.");
1460  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1463  if (ExternalInlineAdvisor)
1465 
1466  // Perform iterative BFS call site prioritized inlining
1467  bool Changed = false;
1468  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1469  InlineCandidate Candidate = CQueue.top();
1470  CQueue.pop();
1471  CallBase *I = Candidate.CallInstr;
1472  Function *CalledFunction = I->getCalledFunction();
1473 
1474  if (CalledFunction == &F)
1475  continue;
1476  if (I->isIndirectCall()) {
1477  uint64_t Sum = 0;
1478  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1479  uint64_t SumOrigin = Sum;
1480  Sum *= Candidate.CallsiteDistribution;
1481  unsigned ICPCount = 0;
1482  for (const auto *FS : CalleeSamples) {
1483  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1484  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1485  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1486  PSI->getOrCompHotCountThreshold());
1487  continue;
1488  }
1489  uint64_t EntryCountDistributed =
1490  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1491  // In addition to regular inline cost check, we also need to make sure
1492  // ICP isn't introducing excessive speculative checks even if individual
1493  // target looks beneficial to promote and inline. That means we should
1494  // only do ICP when there's a small number dominant targets.
1495  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1496  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1497  break;
1498  // TODO: Fix CallAnalyzer to handle all indirect calls.
1499  // For indirect call, we don't run CallAnalyzer to get InlineCost
1500  // before actual inlining. This is because we could see two different
1501  // types from the same definition, which makes CallAnalyzer choke as
1502  // it's expecting matching parameter type on both caller and callee
1503  // side. See example from PR18962 for the triggering cases (the bug was
1504  // fixed, but we generate different types).
1505  if (!PSI->isHotCount(EntryCountDistributed))
1506  break;
1507  SmallVector<CallBase *, 8> InlinedCallSites;
1508  // Attach function profile for promoted indirect callee, and update
1509  // call site count for the promoted inline candidate too.
1510  Candidate = {I, FS, EntryCountDistributed,
1511  Candidate.CallsiteDistribution};
1512  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1513  &InlinedCallSites)) {
1514  for (auto *CB : InlinedCallSites) {
1515  if (getInlineCandidate(&NewCandidate, CB))
1516  CQueue.emplace(NewCandidate);
1517  }
1518  ICPCount++;
1519  Changed = true;
1520  }
1521  }
1522  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1523  !CalledFunction->isDeclaration()) {
1524  SmallVector<CallBase *, 8> InlinedCallSites;
1525  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1526  for (auto *CB : InlinedCallSites) {
1527  if (getInlineCandidate(&NewCandidate, CB))
1528  CQueue.emplace(NewCandidate);
1529  }
1530  Changed = true;
1531  }
1532  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1533  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1534  InlinedGUIDs, SymbolMap,
1535  PSI->getOrCompHotCountThreshold());
1536  }
1537  }
1538 
1539  if (!CQueue.empty()) {
1540  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1541  ++NumCSInlinedHitMaxLimit;
1542  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1543  ++NumCSInlinedHitMinLimit;
1544  else
1545  ++NumCSInlinedHitGrowthLimit;
1546  }
1547 
1548  return Changed;
1549 }
1550 
1551 /// Returns the sorted CallTargetMap \p M by count in descending order.
1555  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1556  R.emplace_back(
1557  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1558  }
1559  return R;
1560 }
1561 
1562 // Generate MD_prof metadata for every branch instruction using the
1563 // edge weights computed during propagation.
1564 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1565  // Generate MD_prof metadata for every branch instruction using the
1566  // edge weights computed during propagation.
1567  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1568  LLVMContext &Ctx = F.getContext();
1569  MDBuilder MDB(Ctx);
1570  for (auto &BI : F) {
1571  BasicBlock *BB = &BI;
1572 
1573  if (BlockWeights[BB]) {
1574  for (auto &I : BB->getInstList()) {
1575  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1576  continue;
1577  if (!cast<CallBase>(I).getCalledFunction()) {
1578  const DebugLoc &DLoc = I.getDebugLoc();
1579  if (!DLoc)
1580  continue;
1581  const DILocation *DIL = DLoc;
1582  const FunctionSamples *FS = findFunctionSamples(I);
1583  if (!FS)
1584  continue;
1586  auto T = FS->findCallTargetMapAt(CallSite);
1587  if (!T || T.get().empty())
1588  continue;
1590  // Prorate the callsite counts based on the pre-ICP distribution
1591  // factor to reflect what is already done to the callsite before
1592  // ICP, such as calliste cloning.
1593  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1594  if (Probe->Factor < 1)
1595  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1596  }
1597  }
1598  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1600  uint64_t Sum = 0;
1601  for (const auto &C : T.get())
1602  Sum += C.second;
1603  // With CSSPGO all indirect call targets are counted torwards the
1604  // original indirect call site in the profile, including both
1605  // inlined and non-inlined targets.
1607  if (const FunctionSamplesMap *M =
1608  FS->findFunctionSamplesMapAt(CallSite)) {
1609  for (const auto &NameFS : *M)
1610  Sum += NameFS.second.getEntrySamples();
1611  }
1612  }
1613  if (Sum)
1614  updateIDTMetaData(I, SortedCallTargets, Sum);
1615  else if (OverwriteExistingWeights)
1616  I.setMetadata(LLVMContext::MD_prof, nullptr);
1617  } else if (!isa<IntrinsicInst>(&I)) {
1618  I.setMetadata(LLVMContext::MD_prof,
1619  MDB.createBranchWeights(
1620  {static_cast<uint32_t>(BlockWeights[BB])}));
1621  }
1622  }
1624  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1625  // clear it for cold code.
1626  for (auto &I : BB->getInstList()) {
1627  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1628  if (cast<CallBase>(I).isIndirectCall())
1629  I.setMetadata(LLVMContext::MD_prof, nullptr);
1630  else
1631  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1632  }
1633  }
1634  }
1635 
1636  Instruction *TI = BB->getTerminator();
1637  if (TI->getNumSuccessors() == 1)
1638  continue;
1639  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1640  !isa<IndirectBrInst>(TI))
1641  continue;
1642 
1643  DebugLoc BranchLoc = TI->getDebugLoc();
1644  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1645  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1646  : Twine("<UNKNOWN LOCATION>"))
1647  << ".\n");
1648  SmallVector<uint32_t, 4> Weights;
1649  uint32_t MaxWeight = 0;
1650  Instruction *MaxDestInst;
1651  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1652  BasicBlock *Succ = TI->getSuccessor(I);
1653  Edge E = std::make_pair(BB, Succ);
1654  uint64_t Weight = EdgeWeights[E];
1655  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1656  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1657  // if needed. Sample counts in profiles are 64-bit unsigned values,
1658  // but internally branch weights are expressed as 32-bit values.
1659  if (Weight > std::numeric_limits<uint32_t>::max()) {
1660  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1662  }
1663  // Weight is added by one to avoid propagation errors introduced by
1664  // 0 weights.
1665  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1666  if (Weight != 0) {
1667  if (Weight > MaxWeight) {
1668  MaxWeight = Weight;
1669  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1670  }
1671  }
1672  }
1673 
1674  uint64_t TempWeight;
1675  // Only set weights if there is at least one non-zero weight.
1676  // In any other case, let the analyzer set weights.
1677  // Do not set weights if the weights are present unless under
1678  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1679  // twice. If the first annotation already set the weights, the second pass
1680  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1681  // weight should have their existing metadata (possibly annotated by LTO
1682  // prelink) cleared.
1683  if (MaxWeight > 0 &&
1684  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1685  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1686  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1687  ORE->emit([&]() {
1688  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1689  << "most popular destination for conditional branches at "
1690  << ore::NV("CondBranchesLoc", BranchLoc);
1691  });
1692  } else {
1694  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1695  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1696  } else {
1697  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1698  }
1699  }
1700  }
1701 }
1702 
1703 /// Once all the branch weights are computed, we emit the MD_prof
1704 /// metadata on BB using the computed values for each of its branches.
1705 ///
1706 /// \param F The function to query.
1707 ///
1708 /// \returns true if \p F was modified. Returns false, otherwise.
1709 bool SampleProfileLoader::emitAnnotations(Function &F) {
1710  bool Changed = false;
1711 
1713  if (!ProbeManager->profileIsValid(F, *Samples)) {
1714  LLVM_DEBUG(
1715  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1716  << F.getName());
1717  ++NumMismatchedProfile;
1718  return false;
1719  }
1720  ++NumMatchedProfile;
1721  } else {
1722  if (getFunctionLoc(F) == 0)
1723  return false;
1724 
1725  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1726  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1727  }
1728 
1729  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1730  if (ProfileIsCS && CallsitePrioritizedInline)
1731  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1732  else
1733  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1734 
1735  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1736 
1737  if (Changed)
1738  generateMDProfMetadata(F);
1739 
1740  emitCoverageRemarks(F);
1741  return Changed;
1742 }
1743 
1745 
1746 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1747  "Sample Profile loader", false, false)
1752 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1754 
1755 std::unique_ptr<ProfiledCallGraph>
1756 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1757  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1758  if (ProfileIsCS)
1759  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1760  else
1761  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1762 
1763  // Add all functions into the profiled call graph even if they are not in
1764  // the profile. This makes sure functions missing from the profile still
1765  // gets a chance to be processed.
1766  for (auto &Node : CG) {
1767  const auto *F = Node.first;
1768  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1769  continue;
1770  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1771  }
1772 
1773  return ProfiledCG;
1774 }
1775 
1776 std::vector<Function *>
1777 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1778  std::vector<Function *> FunctionOrderList;
1779  FunctionOrderList.reserve(M.size());
1780 
1782  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1783  "together with -sample-profile-top-down-load.\n";
1784 
1785  if (!ProfileTopDownLoad || CG == nullptr) {
1786  if (ProfileMergeInlinee) {
1787  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1788  // because the profile for a function may be used for the profile
1789  // annotation of its outline copy before the profile merging of its
1790  // non-inlined inline instances, and that is not the way how
1791  // ProfileMergeInlinee is supposed to work.
1792  ProfileMergeInlinee = false;
1793  }
1794 
1795  for (Function &F : M)
1796  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1797  FunctionOrderList.push_back(&F);
1798  return FunctionOrderList;
1799  }
1800 
1801  assert(&CG->getModule() == &M);
1802 
1803  if (UseProfiledCallGraph ||
1804  (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) {
1805  // Use profiled call edges to augment the top-down order. There are cases
1806  // that the top-down order computed based on the static call graph doesn't
1807  // reflect real execution order. For example
1808  //
1809  // 1. Incomplete static call graph due to unknown indirect call targets.
1810  // Adjusting the order by considering indirect call edges from the
1811  // profile can enable the inlining of indirect call targets by allowing
1812  // the caller processed before them.
1813  // 2. Mutual call edges in an SCC. The static processing order computed for
1814  // an SCC may not reflect the call contexts in the context-sensitive
1815  // profile, thus may cause potential inlining to be overlooked. The
1816  // function order in one SCC is being adjusted to a top-down order based
1817  // on the profile to favor more inlining. This is only a problem with CS
1818  // profile.
1819  // 3. Transitive indirect call edges due to inlining. When a callee function
1820  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1821  // every call edge originated from the callee B will be transferred to
1822  // the caller A. If any transferred edge (say A->C) is indirect, the
1823  // original profiled indirect edge B->C, even if considered, would not
1824  // enforce a top-down order from the caller A to the potential indirect
1825  // call target C in LTO postlink since the inlined callee B is gone from
1826  // the static call graph.
1827  // 4. #3 can happen even for direct call targets, due to functions defined
1828  // in header files. A header function (say A), when included into source
1829  // files, is defined multiple times but only one definition survives due
1830  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1831  // definitions can be useless based on a local file scope. More
1832  // importantly, the inlinee (say B), once fully inlined to a
1833  // to-be-dropped A, will have no profile to consume when its outlined
1834  // version is compiled. This can lead to a profile-less prelink
1835  // compilation for the outlined version of B which may be called from
1836  // external modules. while this isn't easy to fix, we rely on the
1837  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1838  // the A can be inlined in its local scope in prelink, it may not exist
1839  // in the merged IR in postlink, and we'll need the profiled call edges
1840  // to enforce a top-down order for the rest of the functions.
1841  //
1842  // Considering those cases, a profiled call graph completely independent of
1843  // the static call graph is constructed based on profile data, where
1844  // function objects are not even needed to handle case #3 and case 4.
1845  //
1846  // Note that static callgraph edges are completely ignored since they
1847  // can be conflicting with profiled edges for cyclic SCCs and may result in
1848  // an SCC order incompatible with profile-defined one. Using strictly
1849  // profile order ensures a maximum inlining experience. On the other hand,
1850  // static call edges are not so important when they don't correspond to a
1851  // context in the profile.
1852 
1853  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1854  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1855  while (!CGI.isAtEnd()) {
1856  for (ProfiledCallGraphNode *Node : *CGI) {
1857  Function *F = SymbolMap.lookup(Node->Name);
1858  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1859  FunctionOrderList.push_back(F);
1860  }
1861  ++CGI;
1862  }
1863  } else {
1865  while (!CGI.isAtEnd()) {
1866  for (CallGraphNode *Node : *CGI) {
1867  auto *F = Node->getFunction();
1868  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1869  FunctionOrderList.push_back(F);
1870  }
1871  ++CGI;
1872  }
1873  }
1874 
1875  LLVM_DEBUG({
1876  dbgs() << "Function processing order:\n";
1877  for (auto F : reverse(FunctionOrderList)) {
1878  dbgs() << F->getName() << "\n";
1879  }
1880  });
1881 
1882  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1883  return FunctionOrderList;
1884 }
1885 
1886 bool SampleProfileLoader::doInitialization(Module &M,
1888  auto &Ctx = M.getContext();
1889 
1890  auto ReaderOrErr = SampleProfileReader::create(
1891  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1892  if (std::error_code EC = ReaderOrErr.getError()) {
1893  std::string Msg = "Could not open profile: " + EC.message();
1894  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1895  return false;
1896  }
1897  Reader = std::move(ReaderOrErr.get());
1899  // set module before reading the profile so reader may be able to only
1900  // read the function profiles which are used by the current module.
1901  Reader->setModule(&M);
1902  if (std::error_code EC = Reader->read()) {
1903  std::string Msg = "profile reading failed: " + EC.message();
1904  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1905  return false;
1906  }
1907 
1908  PSL = Reader->getProfileSymbolList();
1909 
1910  // While profile-sample-accurate is on, ignore symbol list.
1911  ProfAccForSymsInList =
1913  if (ProfAccForSymsInList) {
1914  NamesInProfile.clear();
1915  if (auto NameTable = Reader->getNameTable())
1916  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1917  CoverageTracker.setProfAccForSymsInList(true);
1918  }
1919 
1920  if (FAM && !ProfileInlineReplayFile.empty()) {
1921  ExternalInlineAdvisor = getReplayInlineAdvisor(
1922  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
1927  /*EmitRemarks=*/false);
1928  }
1929 
1930  // Apply tweaks if context-sensitive profile is available.
1931  if (Reader->profileIsCS()) {
1932  ProfileIsCS = true;
1934 
1935  // Enable priority-base inliner and size inline by default for CSSPGO.
1937  ProfileSizeInline = true;
1940 
1941  // For CSSPGO, use preinliner decision by default when available.
1943  UsePreInlinerDecision = true;
1944 
1945  // For CSSPGO, we also allow recursive inline to best use context profile.
1947  AllowRecursiveInline = true;
1948 
1949  // Enable iterative-BFI by default for CSSPGO.
1951  UseIterativeBFIInference = true;
1952 
1953  // Tracker for profiles under different context
1954  ContextTracker = std::make_unique<SampleContextTracker>(
1955  Reader->getProfiles(), &GUIDToFuncNameMap);
1956  }
1957 
1958  // Load pseudo probe descriptors for probe-based function samples.
1959  if (Reader->profileIsProbeBased()) {
1960  ProbeManager = std::make_unique<PseudoProbeManager>(M);
1961  if (!ProbeManager->moduleIsProbed(M)) {
1962  const char *Msg =
1963  "Pseudo-probe-based profile requires SampleProfileProbePass";
1964  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1965  return false;
1966  }
1967  }
1968 
1969  return true;
1970 }
1971 
1973  return new SampleProfileLoaderLegacyPass();
1974 }
1975 
1977  return new SampleProfileLoaderLegacyPass(Name);
1978 }
1979 
1980 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1981  ProfileSummaryInfo *_PSI, CallGraph *CG) {
1982  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1983 
1984  PSI = _PSI;
1985  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
1986  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1988  PSI->refresh();
1989  }
1990  // Compute the total number of samples collected in this profile.
1991  for (const auto &I : Reader->getProfiles())
1992  TotalCollectedSamples += I.second.getTotalSamples();
1993 
1994  auto Remapper = Reader->getRemapper();
1995  // Populate the symbol map.
1996  for (const auto &N_F : M.getValueSymbolTable()) {
1997  StringRef OrigName = N_F.getKey();
1998  Function *F = dyn_cast<Function>(N_F.getValue());
1999  if (F == nullptr || OrigName.empty())
2000  continue;
2001  SymbolMap[OrigName] = F;
2003  if (OrigName != NewName && !NewName.empty()) {
2004  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2005  // Failiing to insert means there is already an entry in SymbolMap,
2006  // thus there are multiple functions that are mapped to the same
2007  // stripped name. In this case of name conflicting, set the value
2008  // to nullptr to avoid confusion.
2009  if (!r.second)
2010  r.first->second = nullptr;
2011  OrigName = NewName;
2012  }
2013  // Insert the remapped names into SymbolMap.
2014  if (Remapper) {
2015  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2016  if (*MapName != OrigName && !MapName->empty())
2017  SymbolMap.insert(std::make_pair(*MapName, F));
2018  }
2019  }
2020  }
2021  assert(SymbolMap.count(StringRef()) == 0 &&
2022  "No empty StringRef should be added in SymbolMap");
2023 
2024  bool retval = false;
2025  for (auto F : buildFunctionOrder(M, CG)) {
2026  assert(!F->isDeclaration());
2027  clearFunctionData();
2028  retval |= runOnFunction(*F, AM);
2029  }
2030 
2031  // Account for cold calls not inlined....
2032  if (!ProfileIsCS)
2033  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2034  notInlinedCallInfo)
2035  updateProfileCallee(pair.first, pair.second.entryCount);
2036 
2037  return retval;
2038 }
2039 
2040 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
2041  ACT = &getAnalysis<AssumptionCacheTracker>();
2042  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2043  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2044  ProfileSummaryInfo *PSI =
2045  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2046  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
2047 }
2048 
2050  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2051  DILocation2SampleMap.clear();
2052  // By default the entry count is initialized to -1, which will be treated
2053  // conservatively by getEntryCount as the same as unknown (None). This is
2054  // to avoid newly added code to be treated as cold. If we have samples
2055  // this will be overwritten in emitAnnotations.
2056  uint64_t initialEntryCount = -1;
2057 
2058  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2059  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2060  // initialize all the function entry counts to 0. It means all the
2061  // functions without profile will be regarded as cold.
2062  initialEntryCount = 0;
2063  // profile-sample-accurate is a user assertion which has a higher precedence
2064  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2065  ProfAccForSymsInList = false;
2066  }
2067  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2068 
2069  // PSL -- profile symbol list include all the symbols in sampled binary.
2070  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2071  // old functions without samples being cold, without having to worry
2072  // about new and hot functions being mistakenly treated as cold.
2073  if (ProfAccForSymsInList) {
2074  // Initialize the entry count to 0 for functions in the list.
2075  if (PSL->contains(F.getName()))
2076  initialEntryCount = 0;
2077 
2078  // Function in the symbol list but without sample will be regarded as
2079  // cold. To minimize the potential negative performance impact it could
2080  // have, we want to be a little conservative here saying if a function
2081  // shows up in the profile, no matter as outline function, inline instance
2082  // or call targets, treat the function as not being cold. This will handle
2083  // the cases such as most callsites of a function are inlined in sampled
2084  // binary but not inlined in current build (because of source code drift,
2085  // imprecise debug information, or the callsites are all cold individually
2086  // but not cold accumulatively...), so the outline function showing up as
2087  // cold in sampled binary will actually not be cold after current build.
2089  if (NamesInProfile.count(CanonName))
2090  initialEntryCount = -1;
2091  }
2092 
2093  // Initialize entry count when the function has no existing entry
2094  // count value.
2095  if (!F.getEntryCount().hasValue())
2096  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2097  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2098  if (AM) {
2099  auto &FAM =
2101  .getManager();
2103  } else {
2104  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2105  ORE = OwnedORE.get();
2106  }
2107 
2108  if (ProfileIsCS)
2109  Samples = ContextTracker->getBaseSamplesFor(F);
2110  else
2111  Samples = Reader->getSamplesFor(F);
2112 
2113  if (Samples && !Samples->empty())
2114  return emitAnnotations(F);
2115  return false;
2116 }
2117 
2119  ModuleAnalysisManager &AM) {
2122 
2123  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2124  return FAM.getResult<AssumptionAnalysis>(F);
2125  };
2126  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2127  return FAM.getResult<TargetIRAnalysis>(F);
2128  };
2129  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2131  };
2132 
2133  SampleProfileLoader SampleLoader(
2134  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2135  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2136  : ProfileRemappingFileName,
2137  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2138 
2139  if (!SampleLoader.doInitialization(M, &FAM))
2140  return PreservedAnalyses::all();
2141 
2144  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2145  return PreservedAnalyses::all();
2146 
2147  return PreservedAnalyses::none();
2148 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:124
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:842
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
Definition: ReplayInlineAdvisor.cpp:79
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:130
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2372
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2118
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:152
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1028
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1030
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
DebugInfoMetadata.h
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:474
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:558
llvm::Function
Definition: Function.h:62
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:106
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:77
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1541
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:494
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:1972
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:412
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:832
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
llvm::X86AS::FS
@ FS
Definition: X86.h:188
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:112
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:81
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:903
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:933
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:680
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:125
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:497
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:212
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:75
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Allow sample loader inliner to inline recursive calls."))
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:771
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:936
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1430
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:477
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2788
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:373
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:81
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use the preinliner decisions stored in profile context."))
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:40
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:403
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1610
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1054
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:105
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:250
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:783
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:107
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3072
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:485
llvm::sampleprof::SampleContext
Definition: SampleProf.h:469
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:108
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:228
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:804
ValueSymbolTable.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:118
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
SampleProfile.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:422
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
ProfileSummaryInfo.h
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2428
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:818
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:398
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:42
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:688
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:714
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
PriorityQueue.h
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:377
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ReplayInlinerSettings::Scope::Module
@ Module
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1639
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1039
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:107
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1731
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::sampleprof::ProfiledCallGraphNode
Definition: ProfiledCallGraph.h:27
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:116
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:493
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:286
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::CallSiteFormat::Format::Line
@ Line
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:479
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
CallPromotionUtils.h
Profile
Load MIR Sample Profile
Definition: MIRSampleProfile.cpp:62
llvm::ContextTrieNode
Definition: SampleContextTracker.h:36
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
CallGraphSCCPass.h
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:653
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:988
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:39
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:466
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:441
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
profile
sample profile
Definition: SampleProfile.cpp:1752
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:408
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
GenericDomTree.h
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:517
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:56
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1522
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:201
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:345
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:863
llvm::cl::value_desc
Definition: CommandLine.h:422
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:499
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:102
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1032
llvm::BasicBlock::getFirstNonPHIOrDbgOrLifetime
const Instruction * getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:237
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:304
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:108
CallGraph.h
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:25
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:944
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1753
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:368
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1553
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1751
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1487
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1033
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:2029
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:218
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:45
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:795
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773