LLVM  16.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 #include "llvm/ADT/SCCIterator.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/DebugLoc.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/PseudoProbe.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
66 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <cstdint>
84 #include <functional>
85 #include <limits>
86 #include <map>
87 #include <memory>
88 #include <queue>
89 #include <string>
90 #include <system_error>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 using namespace sampleprof;
96 using namespace llvm::sampleprofutil;
98 #define DEBUG_TYPE "sample-profile"
99 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100 
101 STATISTIC(NumCSInlined,
102  "Number of functions inlined with context sensitive profile");
103 STATISTIC(NumCSNotInlined,
104  "Number of functions not inlined with context sensitive profile");
105 STATISTIC(NumMismatchedProfile,
106  "Number of functions with CFG mismatched profile");
107 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
108 STATISTIC(NumDuplicatedInlinesite,
109  "Number of inlined callsites with a partial distribution factor");
110 
111 STATISTIC(NumCSInlinedHitMinLimit,
112  "Number of functions with FDO inline stopped due to min size limit");
113 STATISTIC(NumCSInlinedHitMaxLimit,
114  "Number of functions with FDO inline stopped due to max size limit");
115 STATISTIC(
116  NumCSInlinedHitGrowthLimit,
117  "Number of functions with FDO inline stopped due to growth size limit");
118 
119 // Command line option to specify the file to read samples from. This is
120 // mainly used for debugging.
122  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
123  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
124 
125 // The named file contains a set of transformations that may have been applied
126 // to the symbol names between the program from which the sample data was
127 // collected and the current program's symbols.
129  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
130  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
131 
133  "profile-sample-accurate", cl::Hidden, cl::init(false),
134  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
135  "callsite and function as having 0 samples. Otherwise, treat "
136  "un-sampled callsites and functions conservatively as unknown. "));
137 
139  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
140  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
141  "branches and calls as having 0 samples. Otherwise, treat "
142  "them conservatively as unknown. "));
143 
145  "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
146  cl::desc("For symbols in profile symbol list, regard their profiles to "
147  "be accurate. It may be overriden by profile-sample-accurate. "));
148 
150  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
151  cl::desc("Merge past inlinee's profile to outline version if sample "
152  "profile loader decided not to inline a call site. It will "
153  "only be enabled when top-down order of profile loading is "
154  "enabled. "));
155 
157  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
158  cl::desc("Do profile annotation and inlining for functions in top-down "
159  "order of call graph during sample profile loading. It only "
160  "works for new pass manager. "));
161 
162 static cl::opt<bool>
163  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
164  cl::desc("Process functions in a top-down order "
165  "defined by the profiled call graph when "
166  "-sample-profile-top-down-load is on."));
168  SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
169  cl::desc("Sort profiled recursion by edge weights."));
170 
172  "sample-profile-inline-size", cl::Hidden, cl::init(false),
173  cl::desc("Inline cold call sites in profile loader if it's beneficial "
174  "for code size."));
175 
176 // Since profiles are consumed by many passes, turning on this option has
177 // side effects. For instance, pre-link SCC inliner would see merged profiles
178 // and inline the hot functions (that are skipped in this pass).
180  "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
181  cl::desc("If true, artifically skip inline transformation in sample-loader "
182  "pass, and merge (or scale) profiles (as configured by "
183  "--sample-profile-merge-inlinee)."));
184 
186  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
187  cl::desc("The size growth ratio limit for proirity-based sample profile "
188  "loader inlining."));
189 
191  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
192  cl::desc("The lower bound of size growth limit for "
193  "proirity-based sample profile loader inlining."));
194 
196  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
197  cl::desc("The upper bound of size growth limit for "
198  "proirity-based sample profile loader inlining."));
199 
201  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
202  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
203  "inlining."));
204 
206  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
207  cl::desc("Threshold for inlining cold callsites"));
208 
210  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
211  cl::desc(
212  "Relative hotness percentage threshold for indirect "
213  "call promotion in proirity-based sample profile loader inlining."));
214 
216  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
217  cl::desc(
218  "Skip relative hotness check for ICP up to given number of targets."));
219 
221  "sample-profile-prioritized-inline", cl::Hidden,
222 
223  cl::desc("Use call site prioritized inlining for sample profile loader."
224  "Currently only CSSPGO is supported."));
225 
227  "sample-profile-use-preinliner", cl::Hidden,
228 
229  cl::desc("Use the preinliner decisions stored in profile context."));
230 
232  "sample-profile-recursive-inline", cl::Hidden,
233 
234  cl::desc("Allow sample loader inliner to inline recursive calls."));
235 
237  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
238  cl::desc(
239  "Optimization remarks file containing inline remarks to be replayed "
240  "by inlining from sample profile loader."),
241  cl::Hidden);
242 
244  "sample-profile-inline-replay-scope",
247  "Replay on functions that have remarks associated "
248  "with them (default)"),
250  "Replay on the entire module")),
251  cl::desc("Whether inline replay should be applied to the entire "
252  "Module or just the Functions (default) that are present as "
253  "callers in remarks during sample profile inlining."),
254  cl::Hidden);
255 
257  "sample-profile-inline-replay-fallback",
259  cl::values(
260  clEnumValN(
262  "All decisions not in replay send to original advisor (default)"),
264  "AlwaysInline", "All decisions not in replay are inlined"),
266  "All decisions not in replay are not inlined")),
267  cl::desc("How sample profile inline replay treats sites that don't come "
268  "from the replay. Original: defers to original advisor, "
269  "AlwaysInline: inline all sites not in replay, NeverInline: "
270  "inline no sites not in replay"),
271  cl::Hidden);
272 
274  "sample-profile-inline-replay-format",
276  cl::values(
277  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
279  "<Line Number>:<Column Number>"),
281  "LineDiscriminator", "<Line Number>.<Discriminator>"),
283  "LineColumnDiscriminator",
284  "<Line Number>:<Column Number>.<Discriminator> (default)")),
285  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
286 
287 static cl::opt<unsigned>
288  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
289  cl::desc("Max number of promotions for a single indirect "
290  "call callsite in sample profile loader"));
291 
293  "overwrite-existing-weights", cl::Hidden, cl::init(false),
294  cl::desc("Ignore existing branch weights on IR and always overwrite."));
295 
297  "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
298  cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
299  "sample-profile inline pass name."));
300 
302 
303 namespace {
304 
305 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
306 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
307 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
308 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
309 using BlockEdgeMap =
311 
312 class GUIDToFuncNameMapper {
313 public:
314  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
315  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
316  : CurrentReader(Reader), CurrentModule(M),
317  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
318  if (!CurrentReader.useMD5())
319  return;
320 
321  for (const auto &F : CurrentModule) {
322  StringRef OrigName = F.getName();
323  CurrentGUIDToFuncNameMap.insert(
324  {Function::getGUID(OrigName), OrigName});
325 
326  // Local to global var promotion used by optimization like thinlto
327  // will rename the var and add suffix like ".llvm.xxx" to the
328  // original local name. In sample profile, the suffixes of function
329  // names are all stripped. Since it is possible that the mapper is
330  // built in post-thin-link phase and var promotion has been done,
331  // we need to add the substring of function name without the suffix
332  // into the GUIDToFuncNameMap.
334  if (CanonName != OrigName)
335  CurrentGUIDToFuncNameMap.insert(
336  {Function::getGUID(CanonName), CanonName});
337  }
338 
339  // Update GUIDToFuncNameMap for each function including inlinees.
340  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
341  }
342 
343  ~GUIDToFuncNameMapper() {
344  if (!CurrentReader.useMD5())
345  return;
346 
347  CurrentGUIDToFuncNameMap.clear();
348 
349  // Reset GUIDToFuncNameMap for of each function as they're no
350  // longer valid at this point.
351  SetGUIDToFuncNameMapForAll(nullptr);
352  }
353 
354 private:
355  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
356  std::queue<FunctionSamples *> FSToUpdate;
357  for (auto &IFS : CurrentReader.getProfiles()) {
358  FSToUpdate.push(&IFS.second);
359  }
360 
361  while (!FSToUpdate.empty()) {
362  FunctionSamples *FS = FSToUpdate.front();
363  FSToUpdate.pop();
364  FS->GUIDToFuncNameMap = Map;
365  for (const auto &ICS : FS->getCallsiteSamples()) {
366  const FunctionSamplesMap &FSMap = ICS.second;
367  for (const auto &IFS : FSMap) {
368  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
369  FSToUpdate.push(&FS);
370  }
371  }
372  }
373  }
374 
375  SampleProfileReader &CurrentReader;
376  Module &CurrentModule;
377  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
378 };
379 
380 // Inline candidate used by iterative callsite prioritized inliner
381 struct InlineCandidate {
382  CallBase *CallInstr;
383  const FunctionSamples *CalleeSamples;
384  // Prorated callsite count, which will be used to guide inlining. For example,
385  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
386  // copies will get their own distribution factors and their prorated counts
387  // will be used to decide if they should be inlined independently.
388  uint64_t CallsiteCount;
389  // Call site distribution factor to prorate the profile samples for a
390  // duplicated callsite. Default value is 1.0.
391  float CallsiteDistribution;
392 };
393 
394 // Inline candidate comparer using call site weight
395 struct CandidateComparer {
396  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
397  if (LHS.CallsiteCount != RHS.CallsiteCount)
398  return LHS.CallsiteCount < RHS.CallsiteCount;
399 
400  const FunctionSamples *LCS = LHS.CalleeSamples;
401  const FunctionSamples *RCS = RHS.CalleeSamples;
402  assert(LCS && RCS && "Expect non-null FunctionSamples");
403 
404  // Tie breaker using number of samples try to favor smaller functions first
405  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
406  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
407 
408  // Tie breaker using GUID so we have stable/deterministic inlining order
409  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
410  }
411 };
412 
413 using CandidateQueue =
415  CandidateComparer>;
416 
417 /// Sample profile pass.
418 ///
419 /// This pass reads profile data from the file specified by
420 /// -sample-profile-file and annotates every affected function with the
421 /// profile information found in that file.
422 class SampleProfileLoader final
423  : public SampleProfileLoaderBaseImpl<BasicBlock> {
424 public:
425  SampleProfileLoader(
426  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
427  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
428  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
429  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
431  GetAC(std::move(GetAssumptionCache)),
432  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
433  LTOPhase(LTOPhase),
434  AnnotatedPassName(AnnotateSampleProfileInlinePhase
437  : CSINLINE_DEBUG) {}
438 
439  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
440  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
441  ProfileSummaryInfo *_PSI, CallGraph *CG);
442 
443 protected:
445  bool emitAnnotations(Function &F);
446  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
447  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
448  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
449  const FunctionSamples *
450  findFunctionSamples(const Instruction &I) const override;
451  std::vector<const FunctionSamples *>
452  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
453  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
454  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
456  uint64_t Threshold);
457  // Attempt to promote indirect call and also inline the promoted call
458  bool tryPromoteAndInlineCandidate(
459  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
460  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
461 
462  bool inlineHotFunctions(Function &F,
463  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
464  Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
465  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
466  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
467  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
468  bool
469  tryInlineCandidate(InlineCandidate &Candidate,
470  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
471  bool
472  inlineHotFunctionsWithPriority(Function &F,
473  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
474  // Inline cold/small functions in addition to hot ones
475  bool shouldInlineColdCallee(CallBase &CallInst);
476  void emitOptimizationRemarksForInlineCandidates(
477  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
478  bool Hot);
479  void promoteMergeNotInlinedContextSamples(
481  const Function &F);
482  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
483  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
484  void generateMDProfMetadata(Function &F);
485 
486  /// Map from function name to Function *. Used to find the function from
487  /// the function name. If the function name contains suffix, additional
488  /// entry is added to map from the stripped name to the function if there
489  /// is one-to-one mapping.
491 
494  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
495 
496  /// Profile tracker for different context.
497  std::unique_ptr<SampleContextTracker> ContextTracker;
498 
499  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
500  ///
501  /// We need to know the LTO phase because for example in ThinLTOPrelink
502  /// phase, in annotation, we should not promote indirect calls. Instead,
503  /// we will mark GUIDs that needs to be annotated to the function.
504  const ThinOrFullLTOPhase LTOPhase;
505  const std::string AnnotatedPassName;
506 
507  /// Profle Symbol list tells whether a function name appears in the binary
508  /// used to generate the current profile.
509  std::unique_ptr<ProfileSymbolList> PSL;
510 
511  /// Total number of samples collected in this profile.
512  ///
513  /// This is the sum of all the samples collected in all the functions executed
514  /// at runtime.
515  uint64_t TotalCollectedSamples = 0;
516 
517  // Information recorded when we declined to inline a call site
518  // because we have determined it is too cold is accumulated for
519  // each callee function. Initially this is just the entry count.
520  struct NotInlinedProfileInfo {
521  uint64_t entryCount;
522  };
524 
525  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
526  // all the function symbols defined or declared in current module.
527  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
528 
529  // All the Names used in FunctionSamples including outline function
530  // names, inline instance names and call target names.
531  StringSet<> NamesInProfile;
532 
533  // For symbol in profile symbol list, whether to regard their profiles
534  // to be accurate. It is mainly decided by existance of profile symbol
535  // list and -profile-accurate-for-symsinlist flag, but it can be
536  // overriden by -profile-sample-accurate or profile-sample-accurate
537  // attribute.
538  bool ProfAccForSymsInList;
539 
540  // External inline advisor used to replay inline decision from remarks.
541  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
542 
543  // A pseudo probe helper to correlate the imported sample counts.
544  std::unique_ptr<PseudoProbeManager> ProbeManager;
545 
546 private:
547  const char *getAnnotatedRemarkPassName() const {
548  return AnnotatedPassName.c_str();
549  }
550 };
551 } // end anonymous namespace
552 
553 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
555  return getProbeWeight(Inst);
556 
557  const DebugLoc &DLoc = Inst.getDebugLoc();
558  if (!DLoc)
559  return std::error_code();
560 
561  // Ignore all intrinsics, phinodes and branch instructions.
562  // Branch and phinodes instruction usually contains debug info from sources
563  // outside of the residing basic block, thus we ignore them during annotation.
564  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
565  return std::error_code();
566 
567  // For non-CS profile, if a direct call/invoke instruction is inlined in
568  // profile (findCalleeFunctionSamples returns non-empty result), but not
569  // inlined here, it means that the inlined callsite has no sample, thus the
570  // call instruction should have 0 count.
571  // For CS profile, the callsite count of previously inlined callees is
572  // populated with the entry count of the callees.
574  if (const auto *CB = dyn_cast<CallBase>(&Inst))
575  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
576  return 0;
577 
578  return getInstWeightImpl(Inst);
579 }
580 
581 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
582 // of non-probe instruction. So if all instructions of the BB give error_code,
583 // tell the inference algorithm to infer the BB weight.
584 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
586  "Profile is not pseudo probe based");
587  Optional<PseudoProbe> Probe = extractProbe(Inst);
588  // Ignore the non-probe instruction. If none of the instruction in the BB is
589  // probe, we choose to infer the BB's weight.
590  if (!Probe)
591  return std::error_code();
592 
593  const FunctionSamples *FS = findFunctionSamples(Inst);
594  // If none of the instruction has FunctionSample, we choose to return zero
595  // value sample to indicate the BB is cold. This could happen when the
596  // instruction is from inlinee and no profile data is found.
597  // FIXME: This should not be affected by the source drift issue as 1) if the
598  // newly added function is top-level inliner, it won't match the CFG checksum
599  // in the function profile or 2) if it's the inlinee, the inlinee should have
600  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
601  // we can improve it by adding a switch for profile-sample-block-accurate for
602  // block level counts in the future.
603  if (!FS)
604  return 0;
605 
606  // For non-CS profile, If a direct call/invoke instruction is inlined in
607  // profile (findCalleeFunctionSamples returns non-empty result), but not
608  // inlined here, it means that the inlined callsite has no sample, thus the
609  // call instruction should have 0 count.
610  // For CS profile, the callsite count of previously inlined callees is
611  // populated with the entry count of the callees.
613  if (const auto *CB = dyn_cast<CallBase>(&Inst))
614  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
615  return 0;
616 
617  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
618  if (R) {
619  uint64_t Samples = R.get() * Probe->Factor;
620  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
621  if (FirstMark) {
622  ORE->emit([&]() {
623  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
624  Remark << "Applied " << ore::NV("NumSamples", Samples);
625  Remark << " samples from profile (ProbeId=";
626  Remark << ore::NV("ProbeId", Probe->Id);
627  Remark << ", Factor=";
628  Remark << ore::NV("Factor", Probe->Factor);
629  Remark << ", OriginalSamples=";
630  Remark << ore::NV("OriginalSamples", R.get());
631  Remark << ")";
632  return Remark;
633  });
634  }
635  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
636  << " - weight: " << R.get() << " - factor: "
637  << format("%0.2f", Probe->Factor) << ")\n");
638  return Samples;
639  }
640  return R;
641 }
642 
643 /// Get the FunctionSamples for a call instruction.
644 ///
645 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
646 /// instance in which that call instruction is calling to. It contains
647 /// all samples that resides in the inlined instance. We first find the
648 /// inlined instance in which the call instruction is from, then we
649 /// traverse its children to find the callsite with the matching
650 /// location.
651 ///
652 /// \param Inst Call/Invoke instruction to query.
653 ///
654 /// \returns The FunctionSamples pointer to the inlined instance.
655 const FunctionSamples *
656 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
657  const DILocation *DIL = Inst.getDebugLoc();
658  if (!DIL) {
659  return nullptr;
660  }
661 
662  StringRef CalleeName;
663  if (Function *Callee = Inst.getCalledFunction())
664  CalleeName = Callee->getName();
665 
667  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
668 
669  const FunctionSamples *FS = findFunctionSamples(Inst);
670  if (FS == nullptr)
671  return nullptr;
672 
673  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
674  CalleeName, Reader->getRemapper());
675 }
676 
677 /// Returns a vector of FunctionSamples that are the indirect call targets
678 /// of \p Inst. The vector is sorted by the total number of samples. Stores
679 /// the total call count of the indirect call in \p Sum.
680 std::vector<const FunctionSamples *>
681 SampleProfileLoader::findIndirectCallFunctionSamples(
682  const Instruction &Inst, uint64_t &Sum) const {
683  const DILocation *DIL = Inst.getDebugLoc();
684  std::vector<const FunctionSamples *> R;
685 
686  if (!DIL) {
687  return R;
688  }
689 
690  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
691  assert(L && R && "Expect non-null FunctionSamples");
692  if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
693  return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
694  return FunctionSamples::getGUID(L->getName()) <
695  FunctionSamples::getGUID(R->getName());
696  };
697 
699  auto CalleeSamples =
700  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
701  if (CalleeSamples.empty())
702  return R;
703 
704  // For CSSPGO, we only use target context profile's entry count
705  // as that already includes both inlined callee and non-inlined ones..
706  Sum = 0;
707  for (const auto *const FS : CalleeSamples) {
708  Sum += FS->getHeadSamplesEstimate();
709  R.push_back(FS);
710  }
711  llvm::sort(R, FSCompare);
712  return R;
713  }
714 
715  const FunctionSamples *FS = findFunctionSamples(Inst);
716  if (FS == nullptr)
717  return R;
718 
720  auto T = FS->findCallTargetMapAt(CallSite);
721  Sum = 0;
722  if (T)
723  for (const auto &T_C : T.get())
724  Sum += T_C.second;
725  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
726  if (M->empty())
727  return R;
728  for (const auto &NameFS : *M) {
729  Sum += NameFS.second.getHeadSamplesEstimate();
730  R.push_back(&NameFS.second);
731  }
732  llvm::sort(R, FSCompare);
733  }
734  return R;
735 }
736 
737 const FunctionSamples *
738 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
740  Optional<PseudoProbe> Probe = extractProbe(Inst);
741  if (!Probe)
742  return nullptr;
743  }
744 
745  const DILocation *DIL = Inst.getDebugLoc();
746  if (!DIL)
747  return Samples;
748 
749  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
750  if (it.second) {
752  it.first->second = ContextTracker->getContextSamplesFor(DIL);
753  else
754  it.first->second =
755  Samples->findFunctionSamples(DIL, Reader->getRemapper());
756  }
757  return it.first->second;
758 }
759 
760 /// Check whether the indirect call promotion history of \p Inst allows
761 /// the promotion for \p Candidate.
762 /// If the profile count for the promotion candidate \p Candidate is
763 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
764 /// for \p Inst. If we already have at least MaxNumPromotions
765 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
766 /// cannot promote for \p Inst anymore.
767 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
768  uint32_t NumVals = 0;
769  uint64_t TotalCount = 0;
770  std::unique_ptr<InstrProfValueData[]> ValueData =
771  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
772  bool Valid =
773  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
774  ValueData.get(), NumVals, TotalCount, true);
775  // No valid value profile so no promoted targets have been recorded
776  // before. Ok to do ICP.
777  if (!Valid)
778  return true;
779 
780  unsigned NumPromoted = 0;
781  for (uint32_t I = 0; I < NumVals; I++) {
782  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
783  continue;
784 
785  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
786  // metadata, it means the candidate has been promoted for this
787  // indirect call.
788  if (ValueData[I].Value == Function::getGUID(Candidate))
789  return false;
790  NumPromoted++;
791  // If already have MaxNumPromotions promotion, don't do it anymore.
792  if (NumPromoted == MaxNumPromotions)
793  return false;
794  }
795  return true;
796 }
797 
798 /// Update indirect call target profile metadata for \p Inst.
799 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
800 /// If it is 0, it means updateIDTMetaData is used to mark a
801 /// certain target to be promoted already. If it is not zero,
802 /// we expect to use it to update the total count in the value profile.
803 static void
805  const SmallVectorImpl<InstrProfValueData> &CallTargets,
806  uint64_t Sum) {
807  // Bail out early if MaxNumPromotions is zero.
808  // This prevents allocating an array of zero length below.
809  //
810  // Note `updateIDTMetaData` is called in two places so check
811  // `MaxNumPromotions` inside it.
812  if (MaxNumPromotions == 0)
813  return;
814  uint32_t NumVals = 0;
815  // OldSum is the existing total count in the value profile data.
816  uint64_t OldSum = 0;
817  std::unique_ptr<InstrProfValueData[]> ValueData =
818  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
819  bool Valid =
820  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
821  ValueData.get(), NumVals, OldSum, true);
822 
823  DenseMap<uint64_t, uint64_t> ValueCountMap;
824  if (Sum == 0) {
825  assert((CallTargets.size() == 1 &&
826  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
827  "If sum is 0, assume only one element in CallTargets "
828  "with count being NOMORE_ICP_MAGICNUM");
829  // Initialize ValueCountMap with existing value profile data.
830  if (Valid) {
831  for (uint32_t I = 0; I < NumVals; I++)
832  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
833  }
834  auto Pair =
835  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
836  // If the target already exists in value profile, decrease the total
837  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
838  if (!Pair.second) {
839  OldSum -= Pair.first->second;
840  Pair.first->second = NOMORE_ICP_MAGICNUM;
841  }
842  Sum = OldSum;
843  } else {
844  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
845  // counts in the value profile.
846  if (Valid) {
847  for (uint32_t I = 0; I < NumVals; I++) {
848  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
849  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
850  }
851  }
852 
853  for (const auto &Data : CallTargets) {
854  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
855  if (Pair.second)
856  continue;
857  // The target represented by Data.Value has already been promoted.
858  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
859  // Sum by Data.Count.
860  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
861  Sum -= Data.Count;
862  }
863  }
864 
865  SmallVector<InstrProfValueData, 8> NewCallTargets;
866  for (const auto &ValueCount : ValueCountMap) {
867  NewCallTargets.emplace_back(
868  InstrProfValueData{ValueCount.first, ValueCount.second});
869  }
870 
871  llvm::sort(NewCallTargets,
872  [](const InstrProfValueData &L, const InstrProfValueData &R) {
873  if (L.Count != R.Count)
874  return L.Count > R.Count;
875  return L.Value > R.Value;
876  });
877 
878  uint32_t MaxMDCount =
879  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
880  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
881  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
882 }
883 
884 /// Attempt to promote indirect call and also inline the promoted call.
885 ///
886 /// \param F Caller function.
887 /// \param Candidate ICP and inline candidate.
888 /// \param SumOrigin Original sum of target counts for indirect call before
889 /// promoting given candidate.
890 /// \param Sum Prorated sum of remaining target counts for indirect call
891 /// after promoting given candidate.
892 /// \param InlinedCallSite Output vector for new call sites exposed after
893 /// inlining.
894 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
895  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
896  SmallVector<CallBase *, 8> *InlinedCallSite) {
897  // Bail out early if sample-loader inliner is disabled.
899  return false;
900 
901  // Bail out early if MaxNumPromotions is zero.
902  // This prevents allocating an array of zero length in callees below.
903  if (MaxNumPromotions == 0)
904  return false;
905  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
906  auto R = SymbolMap.find(CalleeFunctionName);
907  if (R == SymbolMap.end() || !R->getValue())
908  return false;
909 
910  auto &CI = *Candidate.CallInstr;
911  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
912  return false;
913 
914  const char *Reason = "Callee function not available";
915  // R->getValue() != &F is to prevent promoting a recursive call.
916  // If it is a recursive call, we do not inline it as it could bloat
917  // the code exponentially. There is way to better handle this, e.g.
918  // clone the caller first, and inline the cloned caller if it is
919  // recursive. As llvm does not inline recursive calls, we will
920  // simply ignore it instead of handling it explicitly.
921  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
922  R->getValue()->hasFnAttribute("use-sample-profile") &&
923  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
924  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
925  // in the value profile metadata so the target won't be promoted again.
926  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
927  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
928  updateIDTMetaData(CI, SortedCallTargets, 0);
929 
930  auto *DI = &pgo::promoteIndirectCall(
931  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
932  if (DI) {
933  Sum -= Candidate.CallsiteCount;
934  // Do not prorate the indirect callsite distribution since the original
935  // distribution will be used to scale down non-promoted profile target
936  // counts later. By doing this we lose track of the real callsite count
937  // for the leftover indirect callsite as a trade off for accurate call
938  // target counts.
939  // TODO: Ideally we would have two separate factors, one for call site
940  // counts and one is used to prorate call target counts.
941  // Do not update the promoted direct callsite distribution at this
942  // point since the original distribution combined with the callee profile
943  // will be used to prorate callsites from the callee if inlined. Once not
944  // inlined, the direct callsite distribution should be prorated so that
945  // the it will reflect the real callsite counts.
946  Candidate.CallInstr = DI;
947  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
948  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
949  if (!Inlined) {
950  // Prorate the direct callsite distribution so that it reflects real
951  // callsite counts.
953  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
954  }
955  return Inlined;
956  }
957  }
958  } else {
959  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
960  << Candidate.CalleeSamples->getFuncName() << " because "
961  << Reason << "\n");
962  }
963  return false;
964 }
965 
966 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
967  if (!ProfileSizeInline)
968  return false;
969 
971  if (Callee == nullptr)
972  return false;
973 
974  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
975  GetAC, GetTLI);
976 
977  if (Cost.isNever())
978  return false;
979 
980  if (Cost.isAlways())
981  return true;
982 
983  return Cost.getCost() <= SampleColdCallSiteThreshold;
984 }
985 
986 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
987  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
988  bool Hot) {
989  for (auto *I : Candidates) {
990  Function *CalledFunction = I->getCalledFunction();
991  if (CalledFunction) {
992  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
993  "InlineAttempt", I->getDebugLoc(),
994  I->getParent())
995  << "previous inlining reattempted for "
996  << (Hot ? "hotness: '" : "size: '")
997  << ore::NV("Callee", CalledFunction) << "' into '"
998  << ore::NV("Caller", &F) << "'");
999  }
1000  }
1001 }
1002 
1003 void SampleProfileLoader::findExternalInlineCandidate(
1004  CallBase *CB, const FunctionSamples *Samples,
1005  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1006  const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
1007 
1008  // If ExternalInlineAdvisor wants to inline an external function
1009  // make sure it's imported
1010  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1011  // Samples may not exist for replayed function, if so
1012  // just add the direct GUID and move on
1013  if (!Samples) {
1014  InlinedGUIDs.insert(
1016  return;
1017  }
1018  // Otherwise, drop the threshold to import everything that we can
1019  Threshold = 0;
1020  }
1021 
1022  assert(Samples && "expect non-null caller profile");
1023 
1024  // For AutoFDO profile, retrieve candidate profiles by walking over
1025  // the nested inlinee profiles.
1027  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1028  return;
1029  }
1030 
1031  ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1032  std::queue<ContextTrieNode *> CalleeList;
1033  CalleeList.push(Caller);
1034  while (!CalleeList.empty()) {
1035  ContextTrieNode *Node = CalleeList.front();
1036  CalleeList.pop();
1037  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1038  // For CSSPGO profile, retrieve candidate profile by walking over the
1039  // trie built for context profile. Note that also take call targets
1040  // even if callee doesn't have a corresponding context profile.
1041  if (!CalleeSample)
1042  continue;
1043 
1044  // If pre-inliner decision is used, honor that for importing as well.
1045  bool PreInline =
1048  if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1049  continue;
1050 
1051  StringRef Name = CalleeSample->getFuncName();
1053  // Add to the import list only when it's defined out of module.
1054  if (!Func || Func->isDeclaration())
1055  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1056 
1057  // Import hot CallTargets, which may not be available in IR because full
1058  // profile annotation cannot be done until backend compilation in ThinLTO.
1059  for (const auto &BS : CalleeSample->getBodySamples())
1060  for (const auto &TS : BS.second.getCallTargets())
1061  if (TS.getValue() > Threshold) {
1062  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1063  const Function *Callee = SymbolMap.lookup(CalleeName);
1064  if (!Callee || Callee->isDeclaration())
1065  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1066  }
1067 
1068  // Import hot child context profile associted with callees. Note that this
1069  // may have some overlap with the call target loop above, but doing this
1070  // based child context profile again effectively allow us to use the max of
1071  // entry count and call target count to determine importing.
1072  for (auto &Child : Node->getAllChildContext()) {
1073  ContextTrieNode *CalleeNode = &Child.second;
1074  CalleeList.push(CalleeNode);
1075  }
1076  }
1077 }
1078 
1079 /// Iteratively inline hot callsites of a function.
1080 ///
1081 /// Iteratively traverse all callsites of the function \p F, so as to
1082 /// find out callsites with corresponding inline instances.
1083 ///
1084 /// For such callsites,
1085 /// - If it is hot enough, inline the callsites and adds callsites of the callee
1086 /// into the caller. If the call is an indirect call, first promote
1087 /// it to direct call. Each indirect call is limited with a single target.
1088 ///
1089 /// - If a callsite is not inlined, merge the its profile to the outline
1090 /// version (if --sample-profile-merge-inlinee is true), or scale the
1091 /// counters of standalone function based on the profile of inlined
1092 /// instances (if --sample-profile-merge-inlinee is false).
1093 ///
1094 /// Later passes may consume the updated profiles.
1095 ///
1096 /// \param F function to perform iterative inlining.
1097 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1098 /// inlined in the profiled binary.
1099 ///
1100 /// \returns True if there is any inline happened.
1101 bool SampleProfileLoader::inlineHotFunctions(
1102  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1103  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1104  // Profile symbol list is ignored when profile-sample-accurate is on.
1105  assert((!ProfAccForSymsInList ||
1107  !F.hasFnAttribute("profile-sample-accurate"))) &&
1108  "ProfAccForSymsInList should be false when profile-sample-accurate "
1109  "is enabled");
1110 
1111  MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1112  bool Changed = false;
1113  bool LocalChanged = true;
1114  while (LocalChanged) {
1115  LocalChanged = false;
1117  for (auto &BB : F) {
1118  bool Hot = false;
1119  SmallVector<CallBase *, 10> AllCandidates;
1120  SmallVector<CallBase *, 10> ColdCandidates;
1121  for (auto &I : BB.getInstList()) {
1122  const FunctionSamples *FS = nullptr;
1123  if (auto *CB = dyn_cast<CallBase>(&I)) {
1124  if (!isa<IntrinsicInst>(I)) {
1125  if ((FS = findCalleeFunctionSamples(*CB))) {
1126  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1127  "GUIDToFuncNameMap has to be populated");
1128  AllCandidates.push_back(CB);
1129  if (FS->getHeadSamplesEstimate() > 0 ||
1131  LocalNotInlinedCallSites.insert({CB, FS});
1132  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1133  Hot = true;
1134  else if (shouldInlineColdCallee(*CB))
1135  ColdCandidates.push_back(CB);
1136  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1137  AllCandidates.push_back(CB);
1138  }
1139  }
1140  }
1141  }
1142  if (Hot || ExternalInlineAdvisor) {
1143  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1144  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1145  } else {
1146  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1147  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1148  }
1149  }
1150  for (CallBase *I : CIS) {
1151  Function *CalledFunction = I->getCalledFunction();
1152  InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1153  0 /* dummy count */,
1154  1.0 /* dummy distribution factor */};
1155  // Do not inline recursive calls.
1156  if (CalledFunction == &F)
1157  continue;
1158  if (I->isIndirectCall()) {
1159  uint64_t Sum;
1160  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1161  uint64_t SumOrigin = Sum;
1162  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1163  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1164  PSI->getOrCompHotCountThreshold());
1165  continue;
1166  }
1167  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1168  continue;
1169 
1170  Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1171  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1172  LocalNotInlinedCallSites.erase(I);
1173  LocalChanged = true;
1174  }
1175  }
1176  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1177  !CalledFunction->isDeclaration()) {
1178  if (tryInlineCandidate(Candidate)) {
1179  LocalNotInlinedCallSites.erase(I);
1180  LocalChanged = true;
1181  }
1182  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1183  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1184  InlinedGUIDs, SymbolMap,
1185  PSI->getOrCompHotCountThreshold());
1186  }
1187  }
1188  Changed |= LocalChanged;
1189  }
1190 
1191  // For CS profile, profile for not inlined context will be merged when
1192  // base profile is being retrieved.
1194  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1195  return Changed;
1196 }
1197 
1198 bool SampleProfileLoader::tryInlineCandidate(
1199  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1200  // Do not attempt to inline a candidate if
1201  // --disable-sample-loader-inlining is true.
1203  return false;
1204 
1205  CallBase &CB = *Candidate.CallInstr;
1206  Function *CalledFunction = CB.getCalledFunction();
1207  assert(CalledFunction && "Expect a callee with definition");
1208  DebugLoc DLoc = CB.getDebugLoc();
1209  BasicBlock *BB = CB.getParent();
1210 
1211  InlineCost Cost = shouldInlineCandidate(Candidate);
1212  if (Cost.isNever()) {
1213  ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1214  "InlineFail", DLoc, BB)
1215  << "incompatible inlining");
1216  return false;
1217  }
1218 
1219  if (!Cost)
1220  return false;
1221 
1222  InlineFunctionInfo IFI(nullptr, GetAC);
1223  IFI.UpdateProfile = false;
1224  InlineResult IR = InlineFunction(CB, IFI,
1225  /*MergeAttributes=*/true);
1226  if (!IR.isSuccess())
1227  return false;
1228 
1229  // The call to InlineFunction erases I, so we can't pass it here.
1230  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1231  Cost, true, getAnnotatedRemarkPassName());
1232 
1233  // Now populate the list of newly exposed call sites.
1234  if (InlinedCallSites) {
1235  InlinedCallSites->clear();
1236  for (auto &I : IFI.InlinedCallSites)
1237  InlinedCallSites->push_back(I);
1238  }
1239 
1241  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1242  ++NumCSInlined;
1243 
1244  // Prorate inlined probes for a duplicated inlining callsite which probably
1245  // has a distribution less than 100%. Samples for an inlinee should be
1246  // distributed among the copies of the original callsite based on each
1247  // callsite's distribution factor for counts accuracy. Note that an inlined
1248  // probe may come with its own distribution factor if it has been duplicated
1249  // in the inlinee body. The two factor are multiplied to reflect the
1250  // aggregation of duplication.
1251  if (Candidate.CallsiteDistribution < 1) {
1252  for (auto &I : IFI.InlinedCallSites) {
1253  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1255  Candidate.CallsiteDistribution);
1256  }
1257  NumDuplicatedInlinesite++;
1258  }
1259 
1260  return true;
1261 }
1262 
1263 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1264  CallBase *CB) {
1265  assert(CB && "Expect non-null call instruction");
1266 
1267  if (isa<IntrinsicInst>(CB))
1268  return false;
1269 
1270  // Find the callee's profile. For indirect call, find hottest target profile.
1271  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1272  // If ExternalInlineAdvisor wants to inline this site, do so even
1273  // if Samples are not present.
1274  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1275  return false;
1276 
1277  float Factor = 1.0;
1278  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1279  Factor = Probe->Factor;
1280 
1281  uint64_t CallsiteCount =
1282  CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1283  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1284  return true;
1285 }
1286 
1288 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1289  std::unique_ptr<InlineAdvice> Advice = nullptr;
1290  if (ExternalInlineAdvisor) {
1291  Advice = ExternalInlineAdvisor->getAdvice(CB);
1292  if (Advice) {
1293  if (!Advice->isInliningRecommended()) {
1294  Advice->recordUnattemptedInlining();
1295  return InlineCost::getNever("not previously inlined");
1296  }
1297  Advice->recordInlining();
1298  return InlineCost::getAlways("previously inlined");
1299  }
1300  }
1301 
1302  return {};
1303 }
1304 
1305 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1306  Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1307  return Cost ? !!Cost.value() : false;
1308 }
1309 
1310 InlineCost
1311 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1312  if (Optional<InlineCost> ReplayCost =
1313  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1314  return ReplayCost.value();
1315  // Adjust threshold based on call site hotness, only do this for callsite
1316  // prioritized inliner because otherwise cost-benefit check is done earlier.
1317  int SampleThreshold = SampleColdCallSiteThreshold;
1319  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1320  SampleThreshold = SampleHotCallSiteThreshold;
1321  else if (!ProfileSizeInline)
1322  return InlineCost::getNever("cold callsite");
1323  }
1324 
1325  Function *Callee = Candidate.CallInstr->getCalledFunction();
1326  assert(Callee && "Expect a definition for inline candidate of direct call");
1327 
1328  InlineParams Params = getInlineParams();
1329  // We will ignore the threshold from inline cost, so always get full cost.
1330  Params.ComputeFullInlineCost = true;
1332  // Checks if there is anything in the reachable portion of the callee at
1333  // this callsite that makes this inlining potentially illegal. Need to
1334  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1335  // when cost exceeds threshold without checking all IRs in the callee.
1336  // The acutal cost does not matter because we only checks isNever() to
1337  // see if it is legal to inline the callsite.
1338  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1339  GetTTI(*Callee), GetAC, GetTLI);
1340 
1341  // Honor always inline and never inline from call analyzer
1342  if (Cost.isNever() || Cost.isAlways())
1343  return Cost;
1344 
1345  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1346  // decisions based on hotness as well as accurate function byte sizes for
1347  // given context using function/inlinee sizes from previous build. It
1348  // stores the decision in profile, and also adjust/merge context profile
1349  // aiming at better context-sensitive post-inline profile quality, assuming
1350  // all inline decision estimates are going to be honored by compiler. Here
1351  // we replay that inline decision under `sample-profile-use-preinliner`.
1352  // Note that we don't need to handle negative decision from preinliner as
1353  // context profile for not inlined calls are merged by preinliner already.
1354  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1355  // Once two node are merged due to promotion, we're losing some context
1356  // so the original context-sensitive preinliner decision should be ignored
1357  // for SyntheticContext.
1358  SampleContext &Context = Candidate.CalleeSamples->getContext();
1359  if (!Context.hasState(SyntheticContext) &&
1360  Context.hasAttribute(ContextShouldBeInlined))
1361  return InlineCost::getAlways("preinliner");
1362  }
1363 
1364  // For old FDO inliner, we inline the call site as long as cost is not
1365  // "Never". The cost-benefit check is done earlier.
1367  return InlineCost::get(Cost.getCost(), INT_MAX);
1368  }
1369 
1370  // Otherwise only use the cost from call analyzer, but overwite threshold with
1371  // Sample PGO threshold.
1372  return InlineCost::get(Cost.getCost(), SampleThreshold);
1373 }
1374 
1375 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1376  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1377  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1378  // Profile symbol list is ignored when profile-sample-accurate is on.
1379  assert((!ProfAccForSymsInList ||
1381  !F.hasFnAttribute("profile-sample-accurate"))) &&
1382  "ProfAccForSymsInList should be false when profile-sample-accurate "
1383  "is enabled");
1384 
1385  // Populating worklist with initial call sites from root inliner, along
1386  // with call site weights.
1387  CandidateQueue CQueue;
1388  InlineCandidate NewCandidate;
1389  for (auto &BB : F) {
1390  for (auto &I : BB.getInstList()) {
1391  auto *CB = dyn_cast<CallBase>(&I);
1392  if (!CB)
1393  continue;
1394  if (getInlineCandidate(&NewCandidate, CB))
1395  CQueue.push(NewCandidate);
1396  }
1397  }
1398 
1399  // Cap the size growth from profile guided inlining. This is needed even
1400  // though cost of each inline candidate already accounts for callee size,
1401  // because with top-down inlining, we can grow inliner size significantly
1402  // with large number of smaller inlinees each pass the cost check.
1404  "Max inline size limit should not be smaller than min inline size "
1405  "limit.");
1406  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1409  if (ExternalInlineAdvisor)
1411 
1412  MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1413 
1414  // Perform iterative BFS call site prioritized inlining
1415  bool Changed = false;
1416  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1417  InlineCandidate Candidate = CQueue.top();
1418  CQueue.pop();
1419  CallBase *I = Candidate.CallInstr;
1420  Function *CalledFunction = I->getCalledFunction();
1421 
1422  if (CalledFunction == &F)
1423  continue;
1424  if (I->isIndirectCall()) {
1425  uint64_t Sum = 0;
1426  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1427  uint64_t SumOrigin = Sum;
1428  Sum *= Candidate.CallsiteDistribution;
1429  unsigned ICPCount = 0;
1430  for (const auto *FS : CalleeSamples) {
1431  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1432  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1433  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1434  PSI->getOrCompHotCountThreshold());
1435  continue;
1436  }
1437  uint64_t EntryCountDistributed =
1438  FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1439  // In addition to regular inline cost check, we also need to make sure
1440  // ICP isn't introducing excessive speculative checks even if individual
1441  // target looks beneficial to promote and inline. That means we should
1442  // only do ICP when there's a small number dominant targets.
1443  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1444  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1445  break;
1446  // TODO: Fix CallAnalyzer to handle all indirect calls.
1447  // For indirect call, we don't run CallAnalyzer to get InlineCost
1448  // before actual inlining. This is because we could see two different
1449  // types from the same definition, which makes CallAnalyzer choke as
1450  // it's expecting matching parameter type on both caller and callee
1451  // side. See example from PR18962 for the triggering cases (the bug was
1452  // fixed, but we generate different types).
1453  if (!PSI->isHotCount(EntryCountDistributed))
1454  break;
1455  SmallVector<CallBase *, 8> InlinedCallSites;
1456  // Attach function profile for promoted indirect callee, and update
1457  // call site count for the promoted inline candidate too.
1458  Candidate = {I, FS, EntryCountDistributed,
1459  Candidate.CallsiteDistribution};
1460  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1461  &InlinedCallSites)) {
1462  for (auto *CB : InlinedCallSites) {
1463  if (getInlineCandidate(&NewCandidate, CB))
1464  CQueue.emplace(NewCandidate);
1465  }
1466  ICPCount++;
1467  Changed = true;
1468  } else if (!ContextTracker) {
1469  LocalNotInlinedCallSites.insert({I, FS});
1470  }
1471  }
1472  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1473  !CalledFunction->isDeclaration()) {
1474  SmallVector<CallBase *, 8> InlinedCallSites;
1475  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1476  for (auto *CB : InlinedCallSites) {
1477  if (getInlineCandidate(&NewCandidate, CB))
1478  CQueue.emplace(NewCandidate);
1479  }
1480  Changed = true;
1481  } else if (!ContextTracker) {
1482  LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1483  }
1484  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1485  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1486  InlinedGUIDs, SymbolMap,
1487  PSI->getOrCompHotCountThreshold());
1488  }
1489  }
1490 
1491  if (!CQueue.empty()) {
1492  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1493  ++NumCSInlinedHitMaxLimit;
1494  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1495  ++NumCSInlinedHitMinLimit;
1496  else
1497  ++NumCSInlinedHitGrowthLimit;
1498  }
1499 
1500  // For CS profile, profile for not inlined context will be merged when
1501  // base profile is being retrieved.
1503  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1504  return Changed;
1505 }
1506 
1507 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1509  const Function &F) {
1510  // Accumulate not inlined callsite information into notInlinedSamples
1511  for (const auto &Pair : NonInlinedCallSites) {
1512  CallBase *I = Pair.first;
1513  Function *Callee = I->getCalledFunction();
1514  if (!Callee || Callee->isDeclaration())
1515  continue;
1516 
1517  ORE->emit(
1518  OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1519  I->getDebugLoc(), I->getParent())
1520  << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1521  << "' into '" << ore::NV("Caller", &F) << "'");
1522 
1523  ++NumCSNotInlined;
1524  const FunctionSamples *FS = Pair.second;
1525  if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1526  continue;
1527  }
1528 
1529  // Do not merge a context that is already duplicated into the base profile.
1530  if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1531  continue;
1532 
1533  if (ProfileMergeInlinee) {
1534  // A function call can be replicated by optimizations like callsite
1535  // splitting or jump threading and the replicates end up sharing the
1536  // sample nested callee profile instead of slicing the original
1537  // inlinee's profile. We want to do merge exactly once by filtering out
1538  // callee profiles with a non-zero head sample count.
1539  if (FS->getHeadSamples() == 0) {
1540  // Use entry samples as head samples during the merge, as inlinees
1541  // don't have head samples.
1542  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1543  FS->getHeadSamplesEstimate());
1544 
1545  // Note that we have to do the merge right after processing function.
1546  // This allows OutlineFS's profile to be used for annotation during
1547  // top-down processing of functions' annotation.
1548  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1549  OutlineFS->merge(*FS, 1);
1550  // Set outlined profile to be synthetic to not bias the inliner.
1551  OutlineFS->SetContextSynthetic();
1552  }
1553  } else {
1554  auto pair =
1555  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1556  pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1557  }
1558  }
1559 }
1560 
1561 /// Returns the sorted CallTargetMap \p M by count in descending order.
1565  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1566  R.emplace_back(
1567  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1568  }
1569  return R;
1570 }
1571 
1572 // Generate MD_prof metadata for every branch instruction using the
1573 // edge weights computed during propagation.
1574 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1575  // Generate MD_prof metadata for every branch instruction using the
1576  // edge weights computed during propagation.
1577  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1578  LLVMContext &Ctx = F.getContext();
1579  MDBuilder MDB(Ctx);
1580  for (auto &BI : F) {
1581  BasicBlock *BB = &BI;
1582 
1583  if (BlockWeights[BB]) {
1584  for (auto &I : BB->getInstList()) {
1585  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1586  continue;
1587  if (!cast<CallBase>(I).getCalledFunction()) {
1588  const DebugLoc &DLoc = I.getDebugLoc();
1589  if (!DLoc)
1590  continue;
1591  const DILocation *DIL = DLoc;
1592  const FunctionSamples *FS = findFunctionSamples(I);
1593  if (!FS)
1594  continue;
1596  auto T = FS->findCallTargetMapAt(CallSite);
1597  if (!T || T.get().empty())
1598  continue;
1600  // Prorate the callsite counts based on the pre-ICP distribution
1601  // factor to reflect what is already done to the callsite before
1602  // ICP, such as calliste cloning.
1603  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1604  if (Probe->Factor < 1)
1605  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1606  }
1607  }
1608  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1610  uint64_t Sum = 0;
1611  for (const auto &C : T.get())
1612  Sum += C.second;
1613  // With CSSPGO all indirect call targets are counted torwards the
1614  // original indirect call site in the profile, including both
1615  // inlined and non-inlined targets.
1617  if (const FunctionSamplesMap *M =
1618  FS->findFunctionSamplesMapAt(CallSite)) {
1619  for (const auto &NameFS : *M)
1620  Sum += NameFS.second.getHeadSamplesEstimate();
1621  }
1622  }
1623  if (Sum)
1624  updateIDTMetaData(I, SortedCallTargets, Sum);
1625  else if (OverwriteExistingWeights)
1626  I.setMetadata(LLVMContext::MD_prof, nullptr);
1627  } else if (!isa<IntrinsicInst>(&I)) {
1628  I.setMetadata(LLVMContext::MD_prof,
1629  MDB.createBranchWeights(
1630  {static_cast<uint32_t>(BlockWeights[BB])}));
1631  }
1632  }
1634  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1635  // clear it for cold code.
1636  for (auto &I : BB->getInstList()) {
1637  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1638  if (cast<CallBase>(I).isIndirectCall())
1639  I.setMetadata(LLVMContext::MD_prof, nullptr);
1640  else
1641  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1642  }
1643  }
1644  }
1645 
1646  Instruction *TI = BB->getTerminator();
1647  if (TI->getNumSuccessors() == 1)
1648  continue;
1649  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1650  !isa<IndirectBrInst>(TI))
1651  continue;
1652 
1653  DebugLoc BranchLoc = TI->getDebugLoc();
1654  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1655  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1656  : Twine("<UNKNOWN LOCATION>"))
1657  << ".\n");
1658  SmallVector<uint32_t, 4> Weights;
1659  uint32_t MaxWeight = 0;
1660  Instruction *MaxDestInst;
1661  // Since profi treats multiple edges (multiway branches) as a single edge,
1662  // we need to distribute the computed weight among the branches. We do
1663  // this by evenly splitting the edge weight among destinations.
1664  DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1665  std::vector<uint64_t> EdgeIndex;
1666  if (SampleProfileUseProfi) {
1667  EdgeIndex.resize(TI->getNumSuccessors());
1668  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1669  const BasicBlock *Succ = TI->getSuccessor(I);
1670  EdgeIndex[I] = EdgeMultiplicity[Succ];
1671  EdgeMultiplicity[Succ]++;
1672  }
1673  }
1674  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1675  BasicBlock *Succ = TI->getSuccessor(I);
1676  Edge E = std::make_pair(BB, Succ);
1677  uint64_t Weight = EdgeWeights[E];
1678  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1679  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1680  // if needed. Sample counts in profiles are 64-bit unsigned values,
1681  // but internally branch weights are expressed as 32-bit values.
1682  if (Weight > std::numeric_limits<uint32_t>::max()) {
1683  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1685  }
1686  if (!SampleProfileUseProfi) {
1687  // Weight is added by one to avoid propagation errors introduced by
1688  // 0 weights.
1689  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1690  } else {
1691  // Profi creates proper weights that do not require "+1" adjustments but
1692  // we evenly split the weight among branches with the same destination.
1693  uint64_t W = Weight / EdgeMultiplicity[Succ];
1694  // Rounding up, if needed, so that first branches are hotter.
1695  if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1696  W++;
1697  Weights.push_back(static_cast<uint32_t>(W));
1698  }
1699  if (Weight != 0) {
1700  if (Weight > MaxWeight) {
1701  MaxWeight = Weight;
1702  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1703  }
1704  }
1705  }
1706 
1707  misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1708 
1709  uint64_t TempWeight;
1710  // Only set weights if there is at least one non-zero weight.
1711  // In any other case, let the analyzer set weights.
1712  // Do not set weights if the weights are present unless under
1713  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1714  // twice. If the first annotation already set the weights, the second pass
1715  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1716  // weight should have their existing metadata (possibly annotated by LTO
1717  // prelink) cleared.
1718  if (MaxWeight > 0 &&
1719  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1720  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1721  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1722  ORE->emit([&]() {
1723  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1724  << "most popular destination for conditional branches at "
1725  << ore::NV("CondBranchesLoc", BranchLoc);
1726  });
1727  } else {
1729  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1730  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1731  } else {
1732  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1733  }
1734  }
1735  }
1736 }
1737 
1738 /// Once all the branch weights are computed, we emit the MD_prof
1739 /// metadata on BB using the computed values for each of its branches.
1740 ///
1741 /// \param F The function to query.
1742 ///
1743 /// \returns true if \p F was modified. Returns false, otherwise.
1744 bool SampleProfileLoader::emitAnnotations(Function &F) {
1745  bool Changed = false;
1746 
1748  if (!ProbeManager->profileIsValid(F, *Samples)) {
1749  LLVM_DEBUG(
1750  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1751  << F.getName());
1752  ++NumMismatchedProfile;
1753  return false;
1754  }
1755  ++NumMatchedProfile;
1756  } else {
1757  if (getFunctionLoc(F) == 0)
1758  return false;
1759 
1760  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1761  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1762  }
1763 
1764  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1766  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1767  else
1768  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1769 
1770  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1771 
1772  if (Changed)
1773  generateMDProfMetadata(F);
1774 
1775  emitCoverageRemarks(F);
1776  return Changed;
1777 }
1778 
1779 std::unique_ptr<ProfiledCallGraph>
1780 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1781  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1783  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1784  else
1785  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1786 
1787  // Add all functions into the profiled call graph even if they are not in
1788  // the profile. This makes sure functions missing from the profile still
1789  // gets a chance to be processed.
1790  for (auto &Node : CG) {
1791  const auto *F = Node.first;
1792  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1793  continue;
1794  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1795  }
1796 
1797  return ProfiledCG;
1798 }
1799 
1800 std::vector<Function *>
1801 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1802  std::vector<Function *> FunctionOrderList;
1803  FunctionOrderList.reserve(M.size());
1804 
1806  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1807  "together with -sample-profile-top-down-load.\n";
1808 
1809  if (!ProfileTopDownLoad || CG == nullptr) {
1810  if (ProfileMergeInlinee) {
1811  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1812  // because the profile for a function may be used for the profile
1813  // annotation of its outline copy before the profile merging of its
1814  // non-inlined inline instances, and that is not the way how
1815  // ProfileMergeInlinee is supposed to work.
1816  ProfileMergeInlinee = false;
1817  }
1818 
1819  for (Function &F : M)
1820  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1821  FunctionOrderList.push_back(&F);
1822  return FunctionOrderList;
1823  }
1824 
1825  assert(&CG->getModule() == &M);
1826 
1829  // Use profiled call edges to augment the top-down order. There are cases
1830  // that the top-down order computed based on the static call graph doesn't
1831  // reflect real execution order. For example
1832  //
1833  // 1. Incomplete static call graph due to unknown indirect call targets.
1834  // Adjusting the order by considering indirect call edges from the
1835  // profile can enable the inlining of indirect call targets by allowing
1836  // the caller processed before them.
1837  // 2. Mutual call edges in an SCC. The static processing order computed for
1838  // an SCC may not reflect the call contexts in the context-sensitive
1839  // profile, thus may cause potential inlining to be overlooked. The
1840  // function order in one SCC is being adjusted to a top-down order based
1841  // on the profile to favor more inlining. This is only a problem with CS
1842  // profile.
1843  // 3. Transitive indirect call edges due to inlining. When a callee function
1844  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1845  // every call edge originated from the callee B will be transferred to
1846  // the caller A. If any transferred edge (say A->C) is indirect, the
1847  // original profiled indirect edge B->C, even if considered, would not
1848  // enforce a top-down order from the caller A to the potential indirect
1849  // call target C in LTO postlink since the inlined callee B is gone from
1850  // the static call graph.
1851  // 4. #3 can happen even for direct call targets, due to functions defined
1852  // in header files. A header function (say A), when included into source
1853  // files, is defined multiple times but only one definition survives due
1854  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1855  // definitions can be useless based on a local file scope. More
1856  // importantly, the inlinee (say B), once fully inlined to a
1857  // to-be-dropped A, will have no profile to consume when its outlined
1858  // version is compiled. This can lead to a profile-less prelink
1859  // compilation for the outlined version of B which may be called from
1860  // external modules. while this isn't easy to fix, we rely on the
1861  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1862  // the A can be inlined in its local scope in prelink, it may not exist
1863  // in the merged IR in postlink, and we'll need the profiled call edges
1864  // to enforce a top-down order for the rest of the functions.
1865  //
1866  // Considering those cases, a profiled call graph completely independent of
1867  // the static call graph is constructed based on profile data, where
1868  // function objects are not even needed to handle case #3 and case 4.
1869  //
1870  // Note that static callgraph edges are completely ignored since they
1871  // can be conflicting with profiled edges for cyclic SCCs and may result in
1872  // an SCC order incompatible with profile-defined one. Using strictly
1873  // profile order ensures a maximum inlining experience. On the other hand,
1874  // static call edges are not so important when they don't correspond to a
1875  // context in the profile.
1876 
1877  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1878  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1879  while (!CGI.isAtEnd()) {
1880  auto Range = *CGI;
1881  if (SortProfiledSCC) {
1882  // Sort nodes in one SCC based on callsite hotness.
1884  Range = *SI;
1885  }
1886  for (auto *Node : Range) {
1887  Function *F = SymbolMap.lookup(Node->Name);
1888  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1889  FunctionOrderList.push_back(F);
1890  }
1891  ++CGI;
1892  }
1893  } else {
1895  while (!CGI.isAtEnd()) {
1896  for (CallGraphNode *Node : *CGI) {
1897  auto *F = Node->getFunction();
1898  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1899  FunctionOrderList.push_back(F);
1900  }
1901  ++CGI;
1902  }
1903  }
1904 
1905  LLVM_DEBUG({
1906  dbgs() << "Function processing order:\n";
1907  for (auto F : reverse(FunctionOrderList)) {
1908  dbgs() << F->getName() << "\n";
1909  }
1910  });
1911 
1912  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1913  return FunctionOrderList;
1914 }
1915 
1916 bool SampleProfileLoader::doInitialization(Module &M,
1918  auto &Ctx = M.getContext();
1919 
1920  auto ReaderOrErr = SampleProfileReader::create(
1921  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1922  if (std::error_code EC = ReaderOrErr.getError()) {
1923  std::string Msg = "Could not open profile: " + EC.message();
1924  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1925  return false;
1926  }
1927  Reader = std::move(ReaderOrErr.get());
1929  // set module before reading the profile so reader may be able to only
1930  // read the function profiles which are used by the current module.
1931  Reader->setModule(&M);
1932  if (std::error_code EC = Reader->read()) {
1933  std::string Msg = "profile reading failed: " + EC.message();
1934  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1935  return false;
1936  }
1937 
1938  PSL = Reader->getProfileSymbolList();
1939 
1940  // While profile-sample-accurate is on, ignore symbol list.
1941  ProfAccForSymsInList =
1943  if (ProfAccForSymsInList) {
1944  NamesInProfile.clear();
1945  if (auto NameTable = Reader->getNameTable())
1946  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1947  CoverageTracker.setProfAccForSymsInList(true);
1948  }
1949 
1950  if (FAM && !ProfileInlineReplayFile.empty()) {
1951  ExternalInlineAdvisor = getReplayInlineAdvisor(
1952  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
1957  /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
1958  }
1959 
1960  // Apply tweaks if context-sensitive or probe-based profile is available.
1961  if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
1962  Reader->profileIsProbeBased()) {
1964  UseIterativeBFIInference = true;
1966  SampleProfileUseProfi = true;
1969  // Enable priority-base inliner and size inline by default for CSSPGO.
1971  ProfileSizeInline = true;
1974  // For CSSPGO, we also allow recursive inline to best use context profile.
1976  AllowRecursiveInline = true;
1977 
1978  if (Reader->profileIsPreInlined()) {
1980  UsePreInlinerDecision = true;
1981  }
1982 
1983  if (!Reader->profileIsCS()) {
1984  // Non-CS profile should be fine without a function size budget for the
1985  // inliner since the contexts in the profile are either all from inlining
1986  // in the prevoius build or pre-computed by the preinliner with a size
1987  // cap, thus they are bounded.
1988  if (!ProfileInlineLimitMin.getNumOccurrences())
1990  if (!ProfileInlineLimitMax.getNumOccurrences())
1992  }
1993  }
1994 
1995  if (Reader->profileIsCS()) {
1996  // Tracker for profiles under different context
1997  ContextTracker = std::make_unique<SampleContextTracker>(
1998  Reader->getProfiles(), &GUIDToFuncNameMap);
1999  }
2000 
2001  // Load pseudo probe descriptors for probe-based function samples.
2002  if (Reader->profileIsProbeBased()) {
2003  ProbeManager = std::make_unique<PseudoProbeManager>(M);
2004  if (!ProbeManager->moduleIsProbed(M)) {
2005  const char *Msg =
2006  "Pseudo-probe-based profile requires SampleProfileProbePass";
2007  Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2008  DS_Warning));
2009  return false;
2010  }
2011  }
2012 
2013  return true;
2014 }
2015 
2016 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2017  ProfileSummaryInfo *_PSI, CallGraph *CG) {
2018  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2019 
2020  PSI = _PSI;
2021  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2022  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2024  PSI->refresh();
2025  }
2026  // Compute the total number of samples collected in this profile.
2027  for (const auto &I : Reader->getProfiles())
2028  TotalCollectedSamples += I.second.getTotalSamples();
2029 
2030  auto Remapper = Reader->getRemapper();
2031  // Populate the symbol map.
2032  for (const auto &N_F : M.getValueSymbolTable()) {
2033  StringRef OrigName = N_F.getKey();
2034  Function *F = dyn_cast<Function>(N_F.getValue());
2035  if (F == nullptr || OrigName.empty())
2036  continue;
2037  SymbolMap[OrigName] = F;
2039  if (OrigName != NewName && !NewName.empty()) {
2040  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2041  // Failiing to insert means there is already an entry in SymbolMap,
2042  // thus there are multiple functions that are mapped to the same
2043  // stripped name. In this case of name conflicting, set the value
2044  // to nullptr to avoid confusion.
2045  if (!r.second)
2046  r.first->second = nullptr;
2047  OrigName = NewName;
2048  }
2049  // Insert the remapped names into SymbolMap.
2050  if (Remapper) {
2051  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2052  if (*MapName != OrigName && !MapName->empty())
2053  SymbolMap.insert(std::make_pair(*MapName, F));
2054  }
2055  }
2056  }
2057  assert(SymbolMap.count(StringRef()) == 0 &&
2058  "No empty StringRef should be added in SymbolMap");
2059 
2060  bool retval = false;
2061  for (auto *F : buildFunctionOrder(M, CG)) {
2062  assert(!F->isDeclaration());
2063  clearFunctionData();
2064  retval |= runOnFunction(*F, AM);
2065  }
2066 
2067  // Account for cold calls not inlined....
2069  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2070  notInlinedCallInfo)
2071  updateProfileCallee(pair.first, pair.second.entryCount);
2072 
2073  return retval;
2074 }
2075 
2077  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2078  DILocation2SampleMap.clear();
2079  // By default the entry count is initialized to -1, which will be treated
2080  // conservatively by getEntryCount as the same as unknown (None). This is
2081  // to avoid newly added code to be treated as cold. If we have samples
2082  // this will be overwritten in emitAnnotations.
2083  uint64_t initialEntryCount = -1;
2084 
2085  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2086  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2087  // initialize all the function entry counts to 0. It means all the
2088  // functions without profile will be regarded as cold.
2089  initialEntryCount = 0;
2090  // profile-sample-accurate is a user assertion which has a higher precedence
2091  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2092  ProfAccForSymsInList = false;
2093  }
2094  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2095 
2096  // PSL -- profile symbol list include all the symbols in sampled binary.
2097  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2098  // old functions without samples being cold, without having to worry
2099  // about new and hot functions being mistakenly treated as cold.
2100  if (ProfAccForSymsInList) {
2101  // Initialize the entry count to 0 for functions in the list.
2102  if (PSL->contains(F.getName()))
2103  initialEntryCount = 0;
2104 
2105  // Function in the symbol list but without sample will be regarded as
2106  // cold. To minimize the potential negative performance impact it could
2107  // have, we want to be a little conservative here saying if a function
2108  // shows up in the profile, no matter as outline function, inline instance
2109  // or call targets, treat the function as not being cold. This will handle
2110  // the cases such as most callsites of a function are inlined in sampled
2111  // binary but not inlined in current build (because of source code drift,
2112  // imprecise debug information, or the callsites are all cold individually
2113  // but not cold accumulatively...), so the outline function showing up as
2114  // cold in sampled binary will actually not be cold after current build.
2116  if (NamesInProfile.count(CanonName))
2117  initialEntryCount = -1;
2118  }
2119 
2120  // Initialize entry count when the function has no existing entry
2121  // count value.
2122  if (!F.getEntryCount())
2123  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2124  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2125  if (AM) {
2126  auto &FAM =
2128  .getManager();
2130  } else {
2131  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2132  ORE = OwnedORE.get();
2133  }
2134 
2136  Samples = ContextTracker->getBaseSamplesFor(F);
2137  else
2138  Samples = Reader->getSamplesFor(F);
2139 
2140  if (Samples && !Samples->empty())
2141  return emitAnnotations(F);
2142  return false;
2143 }
2144 
2146  ModuleAnalysisManager &AM) {
2149 
2150  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2151  return FAM.getResult<AssumptionAnalysis>(F);
2152  };
2153  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2154  return FAM.getResult<TargetIRAnalysis>(F);
2155  };
2156  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2158  };
2159 
2160  SampleProfileLoader SampleLoader(
2161  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2162  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2163  : ProfileRemappingFileName,
2164  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2165 
2166  if (!SampleLoader.doInitialization(M, &FAM))
2167  return PreservedAnalyses::all();
2168 
2171  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2172  return PreservedAnalyses::all();
2173 
2174  return PreservedAnalyses::none();
2175 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AnnotateSampleProfileInlinePhase
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
Instrumentation.h
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:915
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2570
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2145
EnableExtTspBlockPlacement
cl::opt< bool > EnableExtTspBlockPlacement
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
llvm::sampleprof::ContextDuplicatedIntoBase
@ ContextDuplicatedIntoBase
Definition: SampleProf.h:444
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1107
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1109
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:474
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:581
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:98
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1958
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:81
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1583
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
MapVector.h
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:500
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1557
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:443
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:804
Module.h
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:124
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:203
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:80
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:976
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:1006
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:703
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:500
llvm::scc_member_iterator
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:252
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1428
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:230
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:74
llvm::sampleprof::FunctionSamples::SetContextSynthetic
void SetContextSynthetic()
Definition: SampleProf.h:808
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:803
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:1009
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1497
GlobalValue.h
DisableSampleLoaderInlining
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:271
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:477
SortProfiledSCC
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2821
InlinePriorityMode::Cost
@ Cost
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:80
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:119
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:434
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:89
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1817
TargetLibraryInfo.h
llvm::sampleprof::FunctionSamples::getHeadSamplesEstimate
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:891
DenseSet.h
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1133
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:97
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:815
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3100
SampleProfileLoaderBaseUtil.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::InlineContext
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:488
llvm::sampleprof::SampleContext
Definition: SampleProf.h:502
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:110
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:873
ValueSymbolTable.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1538
SampleProfile.h
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1400
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:427
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:693
ProfiledCallGraph.h
uint64_t
ProfileSummaryInfo.h
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
MisExpect.h
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:398
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:711
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:408
llvm::misexpect::checkExpectAnnotations
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ReplayInlinerSettings::Scope::Module
@ Module
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1666
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1118
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:99
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1859
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:128
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:496
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:291
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::CallSiteFormat::Format::Line
@ Line
llvm::MapVector::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:118
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MapVector::lookup
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:111
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:482
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:668
CallPromotionUtils.h
llvm::ContextTrieNode
Definition: SampleContextTracker.h:35
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:654
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1017
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:33
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:466
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::InlinePass::ReplaySampleProfileInliner
@ ReplaySampleProfileInliner
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:441
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:408
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:589
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:68
DiagnosticInfo.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:203
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:342
PseudoProbe.h
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:936
llvm::cl::value_desc
Definition: CommandLine.h:422
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:596
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:502
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:101
llvm::sampleprof::SampleProfileReader::profileIsPreInlined
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Definition: SampleProfReader.h:480
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
llvm::InlinePass::SampleProfileInliner
@ SampleProfileInliner
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1113
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:244
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:112
CallGraph.h
llvm::AnnotateInlinePassName
std::string AnnotateInlinePassName(InlineContext IC)
Definition: InlineAdvisor.cpp:578
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:24
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1017
Instructions.h
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:399
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:354
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1563
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:931
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1062
StringMap.h
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:236
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:177
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:449
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:43
llvm::MapVector::erase
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:174
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
Definition: ReplayInlineAdvisor.cpp:80
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:767
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:923
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:791