LLVM  15.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/PriorityQueue.h"
29 #include "llvm/ADT/SCCIterator.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/MDBuilder.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/PseudoProbe.h"
60 #include "llvm/InitializePasses.h"
61 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
70 #include "llvm/Transforms/IPO.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <cstdint>
82 #include <functional>
83 #include <limits>
84 #include <map>
85 #include <memory>
86 #include <queue>
87 #include <string>
88 #include <system_error>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 using namespace sampleprof;
94 using namespace llvm::sampleprofutil;
96 #define DEBUG_TYPE "sample-profile"
97 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
98 
99 STATISTIC(NumCSInlined,
100  "Number of functions inlined with context sensitive profile");
101 STATISTIC(NumCSNotInlined,
102  "Number of functions not inlined with context sensitive profile");
103 STATISTIC(NumMismatchedProfile,
104  "Number of functions with CFG mismatched profile");
105 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
106 STATISTIC(NumDuplicatedInlinesite,
107  "Number of inlined callsites with a partial distribution factor");
108 
109 STATISTIC(NumCSInlinedHitMinLimit,
110  "Number of functions with FDO inline stopped due to min size limit");
111 STATISTIC(NumCSInlinedHitMaxLimit,
112  "Number of functions with FDO inline stopped due to max size limit");
113 STATISTIC(
114  NumCSInlinedHitGrowthLimit,
115  "Number of functions with FDO inline stopped due to growth size limit");
116 
117 // Command line option to specify the file to read samples from. This is
118 // mainly used for debugging.
120  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
121  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
122 
123 // The named file contains a set of transformations that may have been applied
124 // to the symbol names between the program from which the sample data was
125 // collected and the current program's symbols.
127  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
128  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
129 
131  "profile-sample-accurate", cl::Hidden, cl::init(false),
132  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
133  "callsite and function as having 0 samples. Otherwise, treat "
134  "un-sampled callsites and functions conservatively as unknown. "));
135 
137  "profile-sample-block-accurate", cl::Hidden, cl::init(false),
138  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
139  "branches and calls as having 0 samples. Otherwise, treat "
140  "them conservatively as unknown. "));
141 
143  "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
144  cl::init(true),
145  cl::desc("For symbols in profile symbol list, regard their profiles to "
146  "be accurate. It may be overriden by profile-sample-accurate. "));
147 
149  "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
150  cl::desc("Merge past inlinee's profile to outline version if sample "
151  "profile loader decided not to inline a call site. It will "
152  "only be enabled when top-down order of profile loading is "
153  "enabled. "));
154 
156  "sample-profile-top-down-load", cl::Hidden, cl::init(true),
157  cl::desc("Do profile annotation and inlining for functions in top-down "
158  "order of call graph during sample profile loading. It only "
159  "works for new pass manager. "));
160 
161 static cl::opt<bool>
162  UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
163  cl::desc("Process functions in a top-down order "
164  "defined by the profiled call graph when "
165  "-sample-profile-top-down-load is on."));
167  SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
168  cl::desc("Sort profiled recursion by edge weights."));
169 
171  "sample-profile-inline-size", cl::Hidden, cl::init(false),
172  cl::desc("Inline cold call sites in profile loader if it's beneficial "
173  "for code size."));
174 
175 // Since profiles are consumed by many passes, turning on this option has
176 // side effects. For instance, pre-link SCC inliner would see merged profiles
177 // and inline the hot functions (that are skipped in this pass).
179  "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
180  cl::desc("If true, artifically skip inline transformation in sample-loader "
181  "pass, and merge (or scale) profiles (as configured by "
182  "--sample-profile-merge-inlinee)."));
183 
185  "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
186  cl::desc("The size growth ratio limit for proirity-based sample profile "
187  "loader inlining."));
188 
190  "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
191  cl::desc("The lower bound of size growth limit for "
192  "proirity-based sample profile loader inlining."));
193 
195  "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
196  cl::desc("The upper bound of size growth limit for "
197  "proirity-based sample profile loader inlining."));
198 
200  "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
201  cl::desc("Hot callsite threshold for proirity-based sample profile loader "
202  "inlining."));
203 
205  "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
206  cl::desc("Threshold for inlining cold callsites"));
207 
209  "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
210  cl::desc(
211  "Relative hotness percentage threshold for indirect "
212  "call promotion in proirity-based sample profile loader inlining."));
213 
215  "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
216  cl::desc(
217  "Skip relative hotness check for ICP up to given number of targets."));
218 
220  "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
221  cl::init(false),
222  cl::desc("Use call site prioritized inlining for sample profile loader."
223  "Currently only CSSPGO is supported."));
224 
226  "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
227  cl::init(false),
228  cl::desc("Use the preinliner decisions stored in profile context."));
229 
231  "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
232  cl::init(false),
233  cl::desc("Allow sample loader inliner to inline recursive calls."));
234 
236  "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
237  cl::desc(
238  "Optimization remarks file containing inline remarks to be replayed "
239  "by inlining from sample profile loader."),
240  cl::Hidden);
241 
243  "sample-profile-inline-replay-scope",
246  "Replay on functions that have remarks associated "
247  "with them (default)"),
249  "Replay on the entire module")),
250  cl::desc("Whether inline replay should be applied to the entire "
251  "Module or just the Functions (default) that are present as "
252  "callers in remarks during sample profile inlining."),
253  cl::Hidden);
254 
256  "sample-profile-inline-replay-fallback",
258  cl::values(
259  clEnumValN(
261  "All decisions not in replay send to original advisor (default)"),
263  "AlwaysInline", "All decisions not in replay are inlined"),
265  "All decisions not in replay are not inlined")),
266  cl::desc("How sample profile inline replay treats sites that don't come "
267  "from the replay. Original: defers to original advisor, "
268  "AlwaysInline: inline all sites not in replay, NeverInline: "
269  "inline no sites not in replay"),
270  cl::Hidden);
271 
273  "sample-profile-inline-replay-format",
275  cl::values(
276  clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
278  "<Line Number>:<Column Number>"),
280  "LineDiscriminator", "<Line Number>.<Discriminator>"),
282  "LineColumnDiscriminator",
283  "<Line Number>:<Column Number>.<Discriminator> (default)")),
284  cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
285 
286 static cl::opt<unsigned>
287  MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
289  cl::desc("Max number of promotions for a single indirect "
290  "call callsite in sample profile loader"));
291 
293  "overwrite-existing-weights", cl::Hidden, cl::init(false),
294  cl::desc("Ignore existing branch weights on IR and always overwrite."));
295 
297 
298 namespace {
299 
300 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
301 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
302 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
303 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
304 using BlockEdgeMap =
306 
307 class GUIDToFuncNameMapper {
308 public:
309  GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
310  DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
311  : CurrentReader(Reader), CurrentModule(M),
312  CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
313  if (!CurrentReader.useMD5())
314  return;
315 
316  for (const auto &F : CurrentModule) {
317  StringRef OrigName = F.getName();
318  CurrentGUIDToFuncNameMap.insert(
319  {Function::getGUID(OrigName), OrigName});
320 
321  // Local to global var promotion used by optimization like thinlto
322  // will rename the var and add suffix like ".llvm.xxx" to the
323  // original local name. In sample profile, the suffixes of function
324  // names are all stripped. Since it is possible that the mapper is
325  // built in post-thin-link phase and var promotion has been done,
326  // we need to add the substring of function name without the suffix
327  // into the GUIDToFuncNameMap.
329  if (CanonName != OrigName)
330  CurrentGUIDToFuncNameMap.insert(
331  {Function::getGUID(CanonName), CanonName});
332  }
333 
334  // Update GUIDToFuncNameMap for each function including inlinees.
335  SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
336  }
337 
338  ~GUIDToFuncNameMapper() {
339  if (!CurrentReader.useMD5())
340  return;
341 
342  CurrentGUIDToFuncNameMap.clear();
343 
344  // Reset GUIDToFuncNameMap for of each function as they're no
345  // longer valid at this point.
346  SetGUIDToFuncNameMapForAll(nullptr);
347  }
348 
349 private:
350  void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
351  std::queue<FunctionSamples *> FSToUpdate;
352  for (auto &IFS : CurrentReader.getProfiles()) {
353  FSToUpdate.push(&IFS.second);
354  }
355 
356  while (!FSToUpdate.empty()) {
357  FunctionSamples *FS = FSToUpdate.front();
358  FSToUpdate.pop();
359  FS->GUIDToFuncNameMap = Map;
360  for (const auto &ICS : FS->getCallsiteSamples()) {
361  const FunctionSamplesMap &FSMap = ICS.second;
362  for (auto &IFS : FSMap) {
363  FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
364  FSToUpdate.push(&FS);
365  }
366  }
367  }
368  }
369 
370  SampleProfileReader &CurrentReader;
371  Module &CurrentModule;
372  DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
373 };
374 
375 // Inline candidate used by iterative callsite prioritized inliner
376 struct InlineCandidate {
377  CallBase *CallInstr;
378  const FunctionSamples *CalleeSamples;
379  // Prorated callsite count, which will be used to guide inlining. For example,
380  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
381  // copies will get their own distribution factors and their prorated counts
382  // will be used to decide if they should be inlined independently.
383  uint64_t CallsiteCount;
384  // Call site distribution factor to prorate the profile samples for a
385  // duplicated callsite. Default value is 1.0.
386  float CallsiteDistribution;
387 };
388 
389 // Inline candidate comparer using call site weight
390 struct CandidateComparer {
391  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
392  if (LHS.CallsiteCount != RHS.CallsiteCount)
393  return LHS.CallsiteCount < RHS.CallsiteCount;
394 
395  const FunctionSamples *LCS = LHS.CalleeSamples;
396  const FunctionSamples *RCS = RHS.CalleeSamples;
397  assert(LCS && RCS && "Expect non-null FunctionSamples");
398 
399  // Tie breaker using number of samples try to favor smaller functions first
400  if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
401  return LCS->getBodySamples().size() > RCS->getBodySamples().size();
402 
403  // Tie breaker using GUID so we have stable/deterministic inlining order
404  return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
405  }
406 };
407 
408 using CandidateQueue =
410  CandidateComparer>;
411 
412 /// Sample profile pass.
413 ///
414 /// This pass reads profile data from the file specified by
415 /// -sample-profile-file and annotates every affected function with the
416 /// profile information found in that file.
417 class SampleProfileLoader final
418  : public SampleProfileLoaderBaseImpl<BasicBlock> {
419 public:
420  SampleProfileLoader(
421  StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
422  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
423  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
424  std::function<const TargetLibraryInfo &(Function &)> GetTLI)
426  GetAC(std::move(GetAssumptionCache)),
427  GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
428  LTOPhase(LTOPhase) {}
429 
430  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
431  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
432  ProfileSummaryInfo *_PSI, CallGraph *CG);
433 
434 protected:
436  bool emitAnnotations(Function &F);
437  ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
438  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
439  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
440  const FunctionSamples *
441  findFunctionSamples(const Instruction &I) const override;
442  std::vector<const FunctionSamples *>
443  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
444  void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
445  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
447  uint64_t Threshold);
448  // Attempt to promote indirect call and also inline the promoted call
449  bool tryPromoteAndInlineCandidate(
450  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
451  uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
452 
453  bool inlineHotFunctions(Function &F,
454  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
455  Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
456  bool getExternalInlineAdvisorShouldInline(CallBase &CB);
457  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
458  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
459  bool
460  tryInlineCandidate(InlineCandidate &Candidate,
461  SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
462  bool
463  inlineHotFunctionsWithPriority(Function &F,
464  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
465  // Inline cold/small functions in addition to hot ones
466  bool shouldInlineColdCallee(CallBase &CallInst);
467  void emitOptimizationRemarksForInlineCandidates(
468  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
469  bool Hot);
470  void promoteMergeNotInlinedContextSamples(
472  const Function &F);
473  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
474  std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
475  void generateMDProfMetadata(Function &F);
476 
477  /// Map from function name to Function *. Used to find the function from
478  /// the function name. If the function name contains suffix, additional
479  /// entry is added to map from the stripped name to the function if there
480  /// is one-to-one mapping.
482 
485  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
486 
487  /// Profile tracker for different context.
488  std::unique_ptr<SampleContextTracker> ContextTracker;
489 
490  /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
491  ///
492  /// We need to know the LTO phase because for example in ThinLTOPrelink
493  /// phase, in annotation, we should not promote indirect calls. Instead,
494  /// we will mark GUIDs that needs to be annotated to the function.
495  ThinOrFullLTOPhase LTOPhase;
496 
497  /// Profle Symbol list tells whether a function name appears in the binary
498  /// used to generate the current profile.
499  std::unique_ptr<ProfileSymbolList> PSL;
500 
501  /// Total number of samples collected in this profile.
502  ///
503  /// This is the sum of all the samples collected in all the functions executed
504  /// at runtime.
505  uint64_t TotalCollectedSamples = 0;
506 
507  // Information recorded when we declined to inline a call site
508  // because we have determined it is too cold is accumulated for
509  // each callee function. Initially this is just the entry count.
510  struct NotInlinedProfileInfo {
511  uint64_t entryCount;
512  };
514 
515  // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
516  // all the function symbols defined or declared in current module.
517  DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
518 
519  // All the Names used in FunctionSamples including outline function
520  // names, inline instance names and call target names.
521  StringSet<> NamesInProfile;
522 
523  // For symbol in profile symbol list, whether to regard their profiles
524  // to be accurate. It is mainly decided by existance of profile symbol
525  // list and -profile-accurate-for-symsinlist flag, but it can be
526  // overriden by -profile-sample-accurate or profile-sample-accurate
527  // attribute.
528  bool ProfAccForSymsInList;
529 
530  // External inline advisor used to replay inline decision from remarks.
531  std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
532 
533  // A pseudo probe helper to correlate the imported sample counts.
534  std::unique_ptr<PseudoProbeManager> ProbeManager;
535 };
536 
537 class SampleProfileLoaderLegacyPass : public ModulePass {
538 public:
539  // Class identification, replacement for typeinfo
540  static char ID;
541 
542  SampleProfileLoaderLegacyPass(
545  : ModulePass(ID), SampleLoader(
546  Name, SampleProfileRemappingFile, LTOPhase,
547  [&](Function &F) -> AssumptionCache & {
548  return ACT->getAssumptionCache(F);
549  },
550  [&](Function &F) -> TargetTransformInfo & {
551  return TTIWP->getTTI(F);
552  },
553  [&](Function &F) -> TargetLibraryInfo & {
554  return TLIWP->getTLI(F);
555  }) {
558  }
559 
560  void dump() { SampleLoader.dump(); }
561 
562  bool doInitialization(Module &M) override {
563  return SampleLoader.doInitialization(M);
564  }
565 
566  StringRef getPassName() const override { return "Sample profile pass"; }
567  bool runOnModule(Module &M) override;
568 
569  void getAnalysisUsage(AnalysisUsage &AU) const override {
574  }
575 
576 private:
577  SampleProfileLoader SampleLoader;
578  AssumptionCacheTracker *ACT = nullptr;
579  TargetTransformInfoWrapperPass *TTIWP = nullptr;
580  TargetLibraryInfoWrapperPass *TLIWP = nullptr;
581 };
582 
583 } // end anonymous namespace
584 
585 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
587  return getProbeWeight(Inst);
588 
589  const DebugLoc &DLoc = Inst.getDebugLoc();
590  if (!DLoc)
591  return std::error_code();
592 
593  // Ignore all intrinsics, phinodes and branch instructions.
594  // Branch and phinodes instruction usually contains debug info from sources
595  // outside of the residing basic block, thus we ignore them during annotation.
596  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
597  return std::error_code();
598 
599  // For non-CS profile, if a direct call/invoke instruction is inlined in
600  // profile (findCalleeFunctionSamples returns non-empty result), but not
601  // inlined here, it means that the inlined callsite has no sample, thus the
602  // call instruction should have 0 count.
603  // For CS profile, the callsite count of previously inlined callees is
604  // populated with the entry count of the callees.
606  if (const auto *CB = dyn_cast<CallBase>(&Inst))
607  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
608  return 0;
609 
610  return getInstWeightImpl(Inst);
611 }
612 
613 // Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
614 // of non-probe instruction. So if all instructions of the BB give error_code,
615 // tell the inference algorithm to infer the BB weight.
616 ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
618  "Profile is not pseudo probe based");
619  Optional<PseudoProbe> Probe = extractProbe(Inst);
620  // Ignore the non-probe instruction. If none of the instruction in the BB is
621  // probe, we choose to infer the BB's weight.
622  if (!Probe)
623  return std::error_code();
624 
625  const FunctionSamples *FS = findFunctionSamples(Inst);
626  // If none of the instruction has FunctionSample, we choose to return zero
627  // value sample to indicate the BB is cold. This could happen when the
628  // instruction is from inlinee and no profile data is found.
629  // FIXME: This should not be affected by the source drift issue as 1) if the
630  // newly added function is top-level inliner, it won't match the CFG checksum
631  // in the function profile or 2) if it's the inlinee, the inlinee should have
632  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
633  // we can improve it by adding a switch for profile-sample-block-accurate for
634  // block level counts in the future.
635  if (!FS)
636  return 0;
637 
638  // For non-CS profile, If a direct call/invoke instruction is inlined in
639  // profile (findCalleeFunctionSamples returns non-empty result), but not
640  // inlined here, it means that the inlined callsite has no sample, thus the
641  // call instruction should have 0 count.
642  // For CS profile, the callsite count of previously inlined callees is
643  // populated with the entry count of the callees.
645  if (const auto *CB = dyn_cast<CallBase>(&Inst))
646  if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
647  return 0;
648 
649  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
650  if (R) {
651  uint64_t Samples = R.get() * Probe->Factor;
652  bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
653  if (FirstMark) {
654  ORE->emit([&]() {
655  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
656  Remark << "Applied " << ore::NV("NumSamples", Samples);
657  Remark << " samples from profile (ProbeId=";
658  Remark << ore::NV("ProbeId", Probe->Id);
659  Remark << ", Factor=";
660  Remark << ore::NV("Factor", Probe->Factor);
661  Remark << ", OriginalSamples=";
662  Remark << ore::NV("OriginalSamples", R.get());
663  Remark << ")";
664  return Remark;
665  });
666  }
667  LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
668  << " - weight: " << R.get() << " - factor: "
669  << format("%0.2f", Probe->Factor) << ")\n");
670  return Samples;
671  }
672  return R;
673 }
674 
675 /// Get the FunctionSamples for a call instruction.
676 ///
677 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
678 /// instance in which that call instruction is calling to. It contains
679 /// all samples that resides in the inlined instance. We first find the
680 /// inlined instance in which the call instruction is from, then we
681 /// traverse its children to find the callsite with the matching
682 /// location.
683 ///
684 /// \param Inst Call/Invoke instruction to query.
685 ///
686 /// \returns The FunctionSamples pointer to the inlined instance.
687 const FunctionSamples *
688 SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
689  const DILocation *DIL = Inst.getDebugLoc();
690  if (!DIL) {
691  return nullptr;
692  }
693 
694  StringRef CalleeName;
695  if (Function *Callee = Inst.getCalledFunction())
696  CalleeName = Callee->getName();
697 
699  return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
700 
701  const FunctionSamples *FS = findFunctionSamples(Inst);
702  if (FS == nullptr)
703  return nullptr;
704 
705  return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
706  CalleeName, Reader->getRemapper());
707 }
708 
709 /// Returns a vector of FunctionSamples that are the indirect call targets
710 /// of \p Inst. The vector is sorted by the total number of samples. Stores
711 /// the total call count of the indirect call in \p Sum.
712 std::vector<const FunctionSamples *>
713 SampleProfileLoader::findIndirectCallFunctionSamples(
714  const Instruction &Inst, uint64_t &Sum) const {
715  const DILocation *DIL = Inst.getDebugLoc();
716  std::vector<const FunctionSamples *> R;
717 
718  if (!DIL) {
719  return R;
720  }
721 
722  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
723  assert(L && R && "Expect non-null FunctionSamples");
724  if (L->getEntrySamples() != R->getEntrySamples())
725  return L->getEntrySamples() > R->getEntrySamples();
726  return FunctionSamples::getGUID(L->getName()) <
727  FunctionSamples::getGUID(R->getName());
728  };
729 
731  auto CalleeSamples =
732  ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
733  if (CalleeSamples.empty())
734  return R;
735 
736  // For CSSPGO, we only use target context profile's entry count
737  // as that already includes both inlined callee and non-inlined ones..
738  Sum = 0;
739  for (const auto *const FS : CalleeSamples) {
740  Sum += FS->getEntrySamples();
741  R.push_back(FS);
742  }
743  llvm::sort(R, FSCompare);
744  return R;
745  }
746 
747  const FunctionSamples *FS = findFunctionSamples(Inst);
748  if (FS == nullptr)
749  return R;
750 
752  auto T = FS->findCallTargetMapAt(CallSite);
753  Sum = 0;
754  if (T)
755  for (const auto &T_C : T.get())
756  Sum += T_C.second;
757  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
758  if (M->empty())
759  return R;
760  for (const auto &NameFS : *M) {
761  Sum += NameFS.second.getEntrySamples();
762  R.push_back(&NameFS.second);
763  }
764  llvm::sort(R, FSCompare);
765  }
766  return R;
767 }
768 
769 const FunctionSamples *
770 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
772  Optional<PseudoProbe> Probe = extractProbe(Inst);
773  if (!Probe)
774  return nullptr;
775  }
776 
777  const DILocation *DIL = Inst.getDebugLoc();
778  if (!DIL)
779  return Samples;
780 
781  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
782  if (it.second) {
784  it.first->second = ContextTracker->getContextSamplesFor(DIL);
785  else
786  it.first->second =
787  Samples->findFunctionSamples(DIL, Reader->getRemapper());
788  }
789  return it.first->second;
790 }
791 
792 /// Check whether the indirect call promotion history of \p Inst allows
793 /// the promotion for \p Candidate.
794 /// If the profile count for the promotion candidate \p Candidate is
795 /// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
796 /// for \p Inst. If we already have at least MaxNumPromotions
797 /// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
798 /// cannot promote for \p Inst anymore.
799 static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
800  uint32_t NumVals = 0;
801  uint64_t TotalCount = 0;
802  std::unique_ptr<InstrProfValueData[]> ValueData =
803  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
804  bool Valid =
805  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
806  ValueData.get(), NumVals, TotalCount, true);
807  // No valid value profile so no promoted targets have been recorded
808  // before. Ok to do ICP.
809  if (!Valid)
810  return true;
811 
812  unsigned NumPromoted = 0;
813  for (uint32_t I = 0; I < NumVals; I++) {
814  if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
815  continue;
816 
817  // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
818  // metadata, it means the candidate has been promoted for this
819  // indirect call.
820  if (ValueData[I].Value == Function::getGUID(Candidate))
821  return false;
822  NumPromoted++;
823  // If already have MaxNumPromotions promotion, don't do it anymore.
824  if (NumPromoted == MaxNumPromotions)
825  return false;
826  }
827  return true;
828 }
829 
830 /// Update indirect call target profile metadata for \p Inst.
831 /// Usually \p Sum is the sum of counts of all the targets for \p Inst.
832 /// If it is 0, it means updateIDTMetaData is used to mark a
833 /// certain target to be promoted already. If it is not zero,
834 /// we expect to use it to update the total count in the value profile.
835 static void
837  const SmallVectorImpl<InstrProfValueData> &CallTargets,
838  uint64_t Sum) {
839  // Bail out early if MaxNumPromotions is zero.
840  // This prevents allocating an array of zero length below.
841  //
842  // Note `updateIDTMetaData` is called in two places so check
843  // `MaxNumPromotions` inside it.
844  if (MaxNumPromotions == 0)
845  return;
846  uint32_t NumVals = 0;
847  // OldSum is the existing total count in the value profile data.
848  uint64_t OldSum = 0;
849  std::unique_ptr<InstrProfValueData[]> ValueData =
850  std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
851  bool Valid =
852  getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
853  ValueData.get(), NumVals, OldSum, true);
854 
855  DenseMap<uint64_t, uint64_t> ValueCountMap;
856  if (Sum == 0) {
857  assert((CallTargets.size() == 1 &&
858  CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
859  "If sum is 0, assume only one element in CallTargets "
860  "with count being NOMORE_ICP_MAGICNUM");
861  // Initialize ValueCountMap with existing value profile data.
862  if (Valid) {
863  for (uint32_t I = 0; I < NumVals; I++)
864  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
865  }
866  auto Pair =
867  ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
868  // If the target already exists in value profile, decrease the total
869  // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
870  if (!Pair.second) {
871  OldSum -= Pair.first->second;
872  Pair.first->second = NOMORE_ICP_MAGICNUM;
873  }
874  Sum = OldSum;
875  } else {
876  // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
877  // counts in the value profile.
878  if (Valid) {
879  for (uint32_t I = 0; I < NumVals; I++) {
880  if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
881  ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
882  }
883  }
884 
885  for (const auto &Data : CallTargets) {
886  auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
887  if (Pair.second)
888  continue;
889  // The target represented by Data.Value has already been promoted.
890  // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
891  // Sum by Data.Count.
892  assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
893  Sum -= Data.Count;
894  }
895  }
896 
897  SmallVector<InstrProfValueData, 8> NewCallTargets;
898  for (const auto &ValueCount : ValueCountMap) {
899  NewCallTargets.emplace_back(
900  InstrProfValueData{ValueCount.first, ValueCount.second});
901  }
902 
903  llvm::sort(NewCallTargets,
904  [](const InstrProfValueData &L, const InstrProfValueData &R) {
905  if (L.Count != R.Count)
906  return L.Count > R.Count;
907  return L.Value > R.Value;
908  });
909 
910  uint32_t MaxMDCount =
911  std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
912  annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
913  NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
914 }
915 
916 /// Attempt to promote indirect call and also inline the promoted call.
917 ///
918 /// \param F Caller function.
919 /// \param Candidate ICP and inline candidate.
920 /// \param SumOrigin Original sum of target counts for indirect call before
921 /// promoting given candidate.
922 /// \param Sum Prorated sum of remaining target counts for indirect call
923 /// after promoting given candidate.
924 /// \param InlinedCallSite Output vector for new call sites exposed after
925 /// inlining.
926 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
927  Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
928  SmallVector<CallBase *, 8> *InlinedCallSite) {
929  // Bail out early if sample-loader inliner is disabled.
931  return false;
932 
933  // Bail out early if MaxNumPromotions is zero.
934  // This prevents allocating an array of zero length in callees below.
935  if (MaxNumPromotions == 0)
936  return false;
937  auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
938  auto R = SymbolMap.find(CalleeFunctionName);
939  if (R == SymbolMap.end() || !R->getValue())
940  return false;
941 
942  auto &CI = *Candidate.CallInstr;
943  if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
944  return false;
945 
946  const char *Reason = "Callee function not available";
947  // R->getValue() != &F is to prevent promoting a recursive call.
948  // If it is a recursive call, we do not inline it as it could bloat
949  // the code exponentially. There is way to better handle this, e.g.
950  // clone the caller first, and inline the cloned caller if it is
951  // recursive. As llvm does not inline recursive calls, we will
952  // simply ignore it instead of handling it explicitly.
953  if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
954  R->getValue()->hasFnAttribute("use-sample-profile") &&
955  R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
956  // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
957  // in the value profile metadata so the target won't be promoted again.
958  SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
959  Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
960  updateIDTMetaData(CI, SortedCallTargets, 0);
961 
962  auto *DI = &pgo::promoteIndirectCall(
963  CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
964  if (DI) {
965  Sum -= Candidate.CallsiteCount;
966  // Do not prorate the indirect callsite distribution since the original
967  // distribution will be used to scale down non-promoted profile target
968  // counts later. By doing this we lose track of the real callsite count
969  // for the leftover indirect callsite as a trade off for accurate call
970  // target counts.
971  // TODO: Ideally we would have two separate factors, one for call site
972  // counts and one is used to prorate call target counts.
973  // Do not update the promoted direct callsite distribution at this
974  // point since the original distribution combined with the callee profile
975  // will be used to prorate callsites from the callee if inlined. Once not
976  // inlined, the direct callsite distribution should be prorated so that
977  // the it will reflect the real callsite counts.
978  Candidate.CallInstr = DI;
979  if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
980  bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
981  if (!Inlined) {
982  // Prorate the direct callsite distribution so that it reflects real
983  // callsite counts.
985  *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
986  }
987  return Inlined;
988  }
989  }
990  } else {
991  LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
992  << Candidate.CalleeSamples->getFuncName() << " because "
993  << Reason << "\n");
994  }
995  return false;
996 }
997 
998 bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
999  if (!ProfileSizeInline)
1000  return false;
1001 
1003  if (Callee == nullptr)
1004  return false;
1005 
1006  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
1007  GetAC, GetTLI);
1008 
1009  if (Cost.isNever())
1010  return false;
1011 
1012  if (Cost.isAlways())
1013  return true;
1014 
1015  return Cost.getCost() <= SampleColdCallSiteThreshold;
1016 }
1017 
1018 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1019  const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1020  bool Hot) {
1021  for (auto I : Candidates) {
1022  Function *CalledFunction = I->getCalledFunction();
1023  if (CalledFunction) {
1024  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
1025  I->getDebugLoc(), I->getParent())
1026  << "previous inlining reattempted for "
1027  << (Hot ? "hotness: '" : "size: '")
1028  << ore::NV("Callee", CalledFunction) << "' into '"
1029  << ore::NV("Caller", &F) << "'");
1030  }
1031  }
1032 }
1033 
1034 void SampleProfileLoader::findExternalInlineCandidate(
1035  CallBase *CB, const FunctionSamples *Samples,
1036  DenseSet<GlobalValue::GUID> &InlinedGUIDs,
1037  const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
1038 
1039  // If ExternalInlineAdvisor wants to inline an external function
1040  // make sure it's imported
1041  if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1042  // Samples may not exist for replayed function, if so
1043  // just add the direct GUID and move on
1044  if (!Samples) {
1045  InlinedGUIDs.insert(
1047  return;
1048  }
1049  // Otherwise, drop the threshold to import everything that we can
1050  Threshold = 0;
1051  }
1052 
1053  assert(Samples && "expect non-null caller profile");
1054 
1055  // For AutoFDO profile, retrieve candidate profiles by walking over
1056  // the nested inlinee profiles.
1058  Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1059  return;
1060  }
1061 
1063  ContextTracker->getContextFor(Samples->getContext());
1064  std::queue<ContextTrieNode *> CalleeList;
1065  CalleeList.push(Caller);
1066  while (!CalleeList.empty()) {
1067  ContextTrieNode *Node = CalleeList.front();
1068  CalleeList.pop();
1069  FunctionSamples *CalleeSample = Node->getFunctionSamples();
1070  // For CSSPGO profile, retrieve candidate profile by walking over the
1071  // trie built for context profile. Note that also take call targets
1072  // even if callee doesn't have a corresponding context profile.
1073  if (!CalleeSample)
1074  continue;
1075 
1076  // If pre-inliner decision is used, honor that for importing as well.
1077  bool PreInline =
1080  if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
1081  continue;
1082 
1083  StringRef Name = CalleeSample->getFuncName();
1085  // Add to the import list only when it's defined out of module.
1086  if (!Func || Func->isDeclaration())
1087  InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
1088 
1089  // Import hot CallTargets, which may not be available in IR because full
1090  // profile annotation cannot be done until backend compilation in ThinLTO.
1091  for (const auto &BS : CalleeSample->getBodySamples())
1092  for (const auto &TS : BS.second.getCallTargets())
1093  if (TS.getValue() > Threshold) {
1094  StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
1095  const Function *Callee = SymbolMap.lookup(CalleeName);
1096  if (!Callee || Callee->isDeclaration())
1097  InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
1098  }
1099 
1100  // Import hot child context profile associted with callees. Note that this
1101  // may have some overlap with the call target loop above, but doing this
1102  // based child context profile again effectively allow us to use the max of
1103  // entry count and call target count to determine importing.
1104  for (auto &Child : Node->getAllChildContext()) {
1105  ContextTrieNode *CalleeNode = &Child.second;
1106  CalleeList.push(CalleeNode);
1107  }
1108  }
1109 }
1110 
1111 /// Iteratively inline hot callsites of a function.
1112 ///
1113 /// Iteratively traverse all callsites of the function \p F, so as to
1114 /// find out callsites with corresponding inline instances.
1115 ///
1116 /// For such callsites,
1117 /// - If it is hot enough, inline the callsites and adds callsites of the callee
1118 /// into the caller. If the call is an indirect call, first promote
1119 /// it to direct call. Each indirect call is limited with a single target.
1120 ///
1121 /// - If a callsite is not inlined, merge the its profile to the outline
1122 /// version (if --sample-profile-merge-inlinee is true), or scale the
1123 /// counters of standalone function based on the profile of inlined
1124 /// instances (if --sample-profile-merge-inlinee is false).
1125 ///
1126 /// Later passes may consume the updated profiles.
1127 ///
1128 /// \param F function to perform iterative inlining.
1129 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1130 /// inlined in the profiled binary.
1131 ///
1132 /// \returns True if there is any inline happened.
1133 bool SampleProfileLoader::inlineHotFunctions(
1134  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1135  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1136  // Profile symbol list is ignored when profile-sample-accurate is on.
1137  assert((!ProfAccForSymsInList ||
1139  !F.hasFnAttribute("profile-sample-accurate"))) &&
1140  "ProfAccForSymsInList should be false when profile-sample-accurate "
1141  "is enabled");
1142 
1143  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1144  bool Changed = false;
1145  bool LocalChanged = true;
1146  while (LocalChanged) {
1147  LocalChanged = false;
1149  for (auto &BB : F) {
1150  bool Hot = false;
1151  SmallVector<CallBase *, 10> AllCandidates;
1152  SmallVector<CallBase *, 10> ColdCandidates;
1153  for (auto &I : BB.getInstList()) {
1154  const FunctionSamples *FS = nullptr;
1155  if (auto *CB = dyn_cast<CallBase>(&I)) {
1156  if (!isa<IntrinsicInst>(I)) {
1157  if ((FS = findCalleeFunctionSamples(*CB))) {
1158  assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1159  "GUIDToFuncNameMap has to be populated");
1160  AllCandidates.push_back(CB);
1161  if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
1162  LocalNotInlinedCallSites.try_emplace(CB, FS);
1163  if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1164  Hot = true;
1165  else if (shouldInlineColdCallee(*CB))
1166  ColdCandidates.push_back(CB);
1167  } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1168  AllCandidates.push_back(CB);
1169  }
1170  }
1171  }
1172  }
1173  if (Hot || ExternalInlineAdvisor) {
1174  CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1175  emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1176  } else {
1177  CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1178  emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1179  }
1180  }
1181  for (CallBase *I : CIS) {
1182  Function *CalledFunction = I->getCalledFunction();
1183  InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1184  0 /* dummy count */,
1185  1.0 /* dummy distribution factor */};
1186  // Do not inline recursive calls.
1187  if (CalledFunction == &F)
1188  continue;
1189  if (I->isIndirectCall()) {
1190  uint64_t Sum;
1191  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1192  uint64_t SumOrigin = Sum;
1193  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1194  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1195  PSI->getOrCompHotCountThreshold());
1196  continue;
1197  }
1198  if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1199  continue;
1200 
1201  Candidate = {I, FS, FS->getEntrySamples(), 1.0};
1202  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1203  LocalNotInlinedCallSites.erase(I);
1204  LocalChanged = true;
1205  }
1206  }
1207  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1208  !CalledFunction->isDeclaration()) {
1209  if (tryInlineCandidate(Candidate)) {
1210  LocalNotInlinedCallSites.erase(I);
1211  LocalChanged = true;
1212  }
1213  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1214  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1215  InlinedGUIDs, SymbolMap,
1216  PSI->getOrCompHotCountThreshold());
1217  }
1218  }
1219  Changed |= LocalChanged;
1220  }
1221 
1222  // For CS profile, profile for not inlined context will be merged when
1223  // base profile is being retrieved.
1225  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1226  return Changed;
1227 }
1228 
1229 bool SampleProfileLoader::tryInlineCandidate(
1230  InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1231  // Do not attempt to inline a candidate if
1232  // --disable-sample-loader-inlining is true.
1234  return false;
1235 
1236  CallBase &CB = *Candidate.CallInstr;
1237  Function *CalledFunction = CB.getCalledFunction();
1238  assert(CalledFunction && "Expect a callee with definition");
1239  DebugLoc DLoc = CB.getDebugLoc();
1240  BasicBlock *BB = CB.getParent();
1241 
1242  InlineCost Cost = shouldInlineCandidate(Candidate);
1243  if (Cost.isNever()) {
1244  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
1245  << "incompatible inlining");
1246  return false;
1247  }
1248 
1249  if (!Cost)
1250  return false;
1251 
1252  InlineFunctionInfo IFI(nullptr, GetAC);
1253  IFI.UpdateProfile = false;
1254  if (!InlineFunction(CB, IFI).isSuccess())
1255  return false;
1256 
1257  // Merge the attributes based on the inlining.
1259  *CalledFunction);
1260 
1261  // The call to InlineFunction erases I, so we can't pass it here.
1262  emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
1263  *BB->getParent(), Cost, true, CSINLINE_DEBUG);
1264 
1265  // Now populate the list of newly exposed call sites.
1266  if (InlinedCallSites) {
1267  InlinedCallSites->clear();
1268  for (auto &I : IFI.InlinedCallSites)
1269  InlinedCallSites->push_back(I);
1270  }
1271 
1273  ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1274  ++NumCSInlined;
1275 
1276  // Prorate inlined probes for a duplicated inlining callsite which probably
1277  // has a distribution less than 100%. Samples for an inlinee should be
1278  // distributed among the copies of the original callsite based on each
1279  // callsite's distribution factor for counts accuracy. Note that an inlined
1280  // probe may come with its own distribution factor if it has been duplicated
1281  // in the inlinee body. The two factor are multiplied to reflect the
1282  // aggregation of duplication.
1283  if (Candidate.CallsiteDistribution < 1) {
1284  for (auto &I : IFI.InlinedCallSites) {
1285  if (Optional<PseudoProbe> Probe = extractProbe(*I))
1287  Candidate.CallsiteDistribution);
1288  }
1289  NumDuplicatedInlinesite++;
1290  }
1291 
1292  return true;
1293 }
1294 
1295 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1296  CallBase *CB) {
1297  assert(CB && "Expect non-null call instruction");
1298 
1299  if (isa<IntrinsicInst>(CB))
1300  return false;
1301 
1302  // Find the callee's profile. For indirect call, find hottest target profile.
1303  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1304  // If ExternalInlineAdvisor wants to inline this site, do so even
1305  // if Samples are not present.
1306  if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1307  return false;
1308 
1309  float Factor = 1.0;
1310  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
1311  Factor = Probe->Factor;
1312 
1313  uint64_t CallsiteCount =
1314  CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
1315  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1316  return true;
1317 }
1318 
1320 SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1321  std::unique_ptr<InlineAdvice> Advice = nullptr;
1322  if (ExternalInlineAdvisor) {
1323  Advice = ExternalInlineAdvisor->getAdvice(CB);
1324  if (Advice) {
1325  if (!Advice->isInliningRecommended()) {
1326  Advice->recordUnattemptedInlining();
1327  return InlineCost::getNever("not previously inlined");
1328  }
1329  Advice->recordInlining();
1330  return InlineCost::getAlways("previously inlined");
1331  }
1332  }
1333 
1334  return {};
1335 }
1336 
1337 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1338  Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1339  return Cost ? !!Cost.getValue() : false;
1340 }
1341 
1342 InlineCost
1343 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1344  if (Optional<InlineCost> ReplayCost =
1345  getExternalInlineAdvisorCost(*Candidate.CallInstr))
1346  return ReplayCost.getValue();
1347  // Adjust threshold based on call site hotness, only do this for callsite
1348  // prioritized inliner because otherwise cost-benefit check is done earlier.
1349  int SampleThreshold = SampleColdCallSiteThreshold;
1351  if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1352  SampleThreshold = SampleHotCallSiteThreshold;
1353  else if (!ProfileSizeInline)
1354  return InlineCost::getNever("cold callsite");
1355  }
1356 
1357  Function *Callee = Candidate.CallInstr->getCalledFunction();
1358  assert(Callee && "Expect a definition for inline candidate of direct call");
1359 
1360  InlineParams Params = getInlineParams();
1361  // We will ignore the threshold from inline cost, so always get full cost.
1362  Params.ComputeFullInlineCost = true;
1364  // Checks if there is anything in the reachable portion of the callee at
1365  // this callsite that makes this inlining potentially illegal. Need to
1366  // set ComputeFullInlineCost, otherwise getInlineCost may return early
1367  // when cost exceeds threshold without checking all IRs in the callee.
1368  // The acutal cost does not matter because we only checks isNever() to
1369  // see if it is legal to inline the callsite.
1370  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1371  GetTTI(*Callee), GetAC, GetTLI);
1372 
1373  // Honor always inline and never inline from call analyzer
1374  if (Cost.isNever() || Cost.isAlways())
1375  return Cost;
1376 
1377  // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1378  // decisions based on hotness as well as accurate function byte sizes for
1379  // given context using function/inlinee sizes from previous build. It
1380  // stores the decision in profile, and also adjust/merge context profile
1381  // aiming at better context-sensitive post-inline profile quality, assuming
1382  // all inline decision estimates are going to be honored by compiler. Here
1383  // we replay that inline decision under `sample-profile-use-preinliner`.
1384  // Note that we don't need to handle negative decision from preinliner as
1385  // context profile for not inlined calls are merged by preinliner already.
1386  if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1387  // Once two node are merged due to promotion, we're losing some context
1388  // so the original context-sensitive preinliner decision should be ignored
1389  // for SyntheticContext.
1390  SampleContext &Context = Candidate.CalleeSamples->getContext();
1391  if (!Context.hasState(SyntheticContext) &&
1392  Context.hasAttribute(ContextShouldBeInlined))
1393  return InlineCost::getAlways("preinliner");
1394  }
1395 
1396  // For old FDO inliner, we inline the call site as long as cost is not
1397  // "Never". The cost-benefit check is done earlier.
1399  return InlineCost::get(Cost.getCost(), INT_MAX);
1400  }
1401 
1402  // Otherwise only use the cost from call analyzer, but overwite threshold with
1403  // Sample PGO threshold.
1404  return InlineCost::get(Cost.getCost(), SampleThreshold);
1405 }
1406 
1407 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1408  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1409  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1410  // Profile symbol list is ignored when profile-sample-accurate is on.
1411  assert((!ProfAccForSymsInList ||
1413  !F.hasFnAttribute("profile-sample-accurate"))) &&
1414  "ProfAccForSymsInList should be false when profile-sample-accurate "
1415  "is enabled");
1416 
1417  // Populating worklist with initial call sites from root inliner, along
1418  // with call site weights.
1419  CandidateQueue CQueue;
1420  InlineCandidate NewCandidate;
1421  for (auto &BB : F) {
1422  for (auto &I : BB.getInstList()) {
1423  auto *CB = dyn_cast<CallBase>(&I);
1424  if (!CB)
1425  continue;
1426  if (getInlineCandidate(&NewCandidate, CB))
1427  CQueue.push(NewCandidate);
1428  }
1429  }
1430 
1431  // Cap the size growth from profile guided inlining. This is needed even
1432  // though cost of each inline candidate already accounts for callee size,
1433  // because with top-down inlining, we can grow inliner size significantly
1434  // with large number of smaller inlinees each pass the cost check.
1436  "Max inline size limit should not be smaller than min inline size "
1437  "limit.");
1438  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1441  if (ExternalInlineAdvisor)
1443 
1444  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1445 
1446  // Perform iterative BFS call site prioritized inlining
1447  bool Changed = false;
1448  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1449  InlineCandidate Candidate = CQueue.top();
1450  CQueue.pop();
1451  CallBase *I = Candidate.CallInstr;
1452  Function *CalledFunction = I->getCalledFunction();
1453 
1454  if (CalledFunction == &F)
1455  continue;
1456  if (I->isIndirectCall()) {
1457  uint64_t Sum = 0;
1458  auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1459  uint64_t SumOrigin = Sum;
1460  Sum *= Candidate.CallsiteDistribution;
1461  unsigned ICPCount = 0;
1462  for (const auto *FS : CalleeSamples) {
1463  // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1464  if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1465  findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
1466  PSI->getOrCompHotCountThreshold());
1467  continue;
1468  }
1469  uint64_t EntryCountDistributed =
1470  FS->getEntrySamples() * Candidate.CallsiteDistribution;
1471  // In addition to regular inline cost check, we also need to make sure
1472  // ICP isn't introducing excessive speculative checks even if individual
1473  // target looks beneficial to promote and inline. That means we should
1474  // only do ICP when there's a small number dominant targets.
1475  if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1476  EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1477  break;
1478  // TODO: Fix CallAnalyzer to handle all indirect calls.
1479  // For indirect call, we don't run CallAnalyzer to get InlineCost
1480  // before actual inlining. This is because we could see two different
1481  // types from the same definition, which makes CallAnalyzer choke as
1482  // it's expecting matching parameter type on both caller and callee
1483  // side. See example from PR18962 for the triggering cases (the bug was
1484  // fixed, but we generate different types).
1485  if (!PSI->isHotCount(EntryCountDistributed))
1486  break;
1487  SmallVector<CallBase *, 8> InlinedCallSites;
1488  // Attach function profile for promoted indirect callee, and update
1489  // call site count for the promoted inline candidate too.
1490  Candidate = {I, FS, EntryCountDistributed,
1491  Candidate.CallsiteDistribution};
1492  if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1493  &InlinedCallSites)) {
1494  for (auto *CB : InlinedCallSites) {
1495  if (getInlineCandidate(&NewCandidate, CB))
1496  CQueue.emplace(NewCandidate);
1497  }
1498  ICPCount++;
1499  Changed = true;
1500  } else if (!ContextTracker) {
1501  LocalNotInlinedCallSites.try_emplace(I, FS);
1502  }
1503  }
1504  } else if (CalledFunction && CalledFunction->getSubprogram() &&
1505  !CalledFunction->isDeclaration()) {
1506  SmallVector<CallBase *, 8> InlinedCallSites;
1507  if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1508  for (auto *CB : InlinedCallSites) {
1509  if (getInlineCandidate(&NewCandidate, CB))
1510  CQueue.emplace(NewCandidate);
1511  }
1512  Changed = true;
1513  } else if (!ContextTracker) {
1514  LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples);
1515  }
1516  } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1517  findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1518  InlinedGUIDs, SymbolMap,
1519  PSI->getOrCompHotCountThreshold());
1520  }
1521  }
1522 
1523  if (!CQueue.empty()) {
1524  if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1525  ++NumCSInlinedHitMaxLimit;
1526  else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1527  ++NumCSInlinedHitMinLimit;
1528  else
1529  ++NumCSInlinedHitGrowthLimit;
1530  }
1531 
1532  // For CS profile, profile for not inlined context will be merged when
1533  // base profile is being retrieved.
1535  promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1536  return Changed;
1537 }
1538 
1539 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1541  const Function &F) {
1542  // Accumulate not inlined callsite information into notInlinedSamples
1543  for (const auto &Pair : NonInlinedCallSites) {
1544  CallBase *I = Pair.getFirst();
1545  Function *Callee = I->getCalledFunction();
1546  if (!Callee || Callee->isDeclaration())
1547  continue;
1548 
1549  ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
1550  I->getDebugLoc(), I->getParent())
1551  << "previous inlining not repeated: '"
1552  << ore::NV("Callee", Callee) << "' into '"
1553  << ore::NV("Caller", &F) << "'");
1554 
1555  ++NumCSNotInlined;
1556  const FunctionSamples *FS = Pair.getSecond();
1557  if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
1558  continue;
1559  }
1560 
1561  // Do not merge a context that is already duplicated into the base profile.
1562  if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1563  continue;
1564 
1565  if (ProfileMergeInlinee) {
1566  // A function call can be replicated by optimizations like callsite
1567  // splitting or jump threading and the replicates end up sharing the
1568  // sample nested callee profile instead of slicing the original
1569  // inlinee's profile. We want to do merge exactly once by filtering out
1570  // callee profiles with a non-zero head sample count.
1571  if (FS->getHeadSamples() == 0) {
1572  // Use entry samples as head samples during the merge, as inlinees
1573  // don't have head samples.
1574  const_cast<FunctionSamples *>(FS)->addHeadSamples(
1575  FS->getEntrySamples());
1576 
1577  // Note that we have to do the merge right after processing function.
1578  // This allows OutlineFS's profile to be used for annotation during
1579  // top-down processing of functions' annotation.
1580  FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
1581  OutlineFS->merge(*FS, 1);
1582  // Set outlined profile to be synthetic to not bias the inliner.
1583  OutlineFS->SetContextSynthetic();
1584  }
1585  } else {
1586  auto pair =
1587  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1588  pair.first->second.entryCount += FS->getEntrySamples();
1589  }
1590  }
1591 }
1592 
1593 /// Returns the sorted CallTargetMap \p M by count in descending order.
1597  for (const auto &I : SampleRecord::SortCallTargets(M)) {
1598  R.emplace_back(
1599  InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
1600  }
1601  return R;
1602 }
1603 
1604 // Generate MD_prof metadata for every branch instruction using the
1605 // edge weights computed during propagation.
1606 void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1607  // Generate MD_prof metadata for every branch instruction using the
1608  // edge weights computed during propagation.
1609  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1610  LLVMContext &Ctx = F.getContext();
1611  MDBuilder MDB(Ctx);
1612  for (auto &BI : F) {
1613  BasicBlock *BB = &BI;
1614 
1615  if (BlockWeights[BB]) {
1616  for (auto &I : BB->getInstList()) {
1617  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1618  continue;
1619  if (!cast<CallBase>(I).getCalledFunction()) {
1620  const DebugLoc &DLoc = I.getDebugLoc();
1621  if (!DLoc)
1622  continue;
1623  const DILocation *DIL = DLoc;
1624  const FunctionSamples *FS = findFunctionSamples(I);
1625  if (!FS)
1626  continue;
1628  auto T = FS->findCallTargetMapAt(CallSite);
1629  if (!T || T.get().empty())
1630  continue;
1632  // Prorate the callsite counts based on the pre-ICP distribution
1633  // factor to reflect what is already done to the callsite before
1634  // ICP, such as calliste cloning.
1635  if (Optional<PseudoProbe> Probe = extractProbe(I)) {
1636  if (Probe->Factor < 1)
1637  T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1638  }
1639  }
1640  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1642  uint64_t Sum = 0;
1643  for (const auto &C : T.get())
1644  Sum += C.second;
1645  // With CSSPGO all indirect call targets are counted torwards the
1646  // original indirect call site in the profile, including both
1647  // inlined and non-inlined targets.
1649  if (const FunctionSamplesMap *M =
1650  FS->findFunctionSamplesMapAt(CallSite)) {
1651  for (const auto &NameFS : *M)
1652  Sum += NameFS.second.getEntrySamples();
1653  }
1654  }
1655  if (Sum)
1656  updateIDTMetaData(I, SortedCallTargets, Sum);
1657  else if (OverwriteExistingWeights)
1658  I.setMetadata(LLVMContext::MD_prof, nullptr);
1659  } else if (!isa<IntrinsicInst>(&I)) {
1660  I.setMetadata(LLVMContext::MD_prof,
1661  MDB.createBranchWeights(
1662  {static_cast<uint32_t>(BlockWeights[BB])}));
1663  }
1664  }
1666  // Set profile metadata (possibly annotated by LTO prelink) to zero or
1667  // clear it for cold code.
1668  for (auto &I : BB->getInstList()) {
1669  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1670  if (cast<CallBase>(I).isIndirectCall())
1671  I.setMetadata(LLVMContext::MD_prof, nullptr);
1672  else
1673  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1674  }
1675  }
1676  }
1677 
1678  Instruction *TI = BB->getTerminator();
1679  if (TI->getNumSuccessors() == 1)
1680  continue;
1681  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1682  !isa<IndirectBrInst>(TI))
1683  continue;
1684 
1685  DebugLoc BranchLoc = TI->getDebugLoc();
1686  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1687  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1688  : Twine("<UNKNOWN LOCATION>"))
1689  << ".\n");
1690  SmallVector<uint32_t, 4> Weights;
1691  uint32_t MaxWeight = 0;
1692  Instruction *MaxDestInst;
1693  // Since profi treats multiple edges (multiway branches) as a single edge,
1694  // we need to distribute the computed weight among the branches. We do
1695  // this by evenly splitting the edge weight among destinations.
1696  DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1697  std::vector<uint64_t> EdgeIndex;
1698  if (SampleProfileUseProfi) {
1699  EdgeIndex.resize(TI->getNumSuccessors());
1700  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1701  const BasicBlock *Succ = TI->getSuccessor(I);
1702  EdgeIndex[I] = EdgeMultiplicity[Succ];
1703  EdgeMultiplicity[Succ]++;
1704  }
1705  }
1706  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1707  BasicBlock *Succ = TI->getSuccessor(I);
1708  Edge E = std::make_pair(BB, Succ);
1709  uint64_t Weight = EdgeWeights[E];
1710  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1711  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1712  // if needed. Sample counts in profiles are 64-bit unsigned values,
1713  // but internally branch weights are expressed as 32-bit values.
1714  if (Weight > std::numeric_limits<uint32_t>::max()) {
1715  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1717  }
1718  if (!SampleProfileUseProfi) {
1719  // Weight is added by one to avoid propagation errors introduced by
1720  // 0 weights.
1721  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1722  } else {
1723  // Profi creates proper weights that do not require "+1" adjustments but
1724  // we evenly split the weight among branches with the same destination.
1725  uint64_t W = Weight / EdgeMultiplicity[Succ];
1726  // Rounding up, if needed, so that first branches are hotter.
1727  if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1728  W++;
1729  Weights.push_back(static_cast<uint32_t>(W));
1730  }
1731  if (Weight != 0) {
1732  if (Weight > MaxWeight) {
1733  MaxWeight = Weight;
1734  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1735  }
1736  }
1737  }
1738 
1739  // FIXME: Re-enable for sample profiling after investigating why the sum
1740  // of branch weights can be 0
1741  //
1742  // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1743 
1744  uint64_t TempWeight;
1745  // Only set weights if there is at least one non-zero weight.
1746  // In any other case, let the analyzer set weights.
1747  // Do not set weights if the weights are present unless under
1748  // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1749  // twice. If the first annotation already set the weights, the second pass
1750  // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1751  // weight should have their existing metadata (possibly annotated by LTO
1752  // prelink) cleared.
1753  if (MaxWeight > 0 &&
1754  (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1755  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1756  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1757  ORE->emit([&]() {
1758  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1759  << "most popular destination for conditional branches at "
1760  << ore::NV("CondBranchesLoc", BranchLoc);
1761  });
1762  } else {
1764  TI->setMetadata(LLVMContext::MD_prof, nullptr);
1765  LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1766  } else {
1767  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1768  }
1769  }
1770  }
1771 }
1772 
1773 /// Once all the branch weights are computed, we emit the MD_prof
1774 /// metadata on BB using the computed values for each of its branches.
1775 ///
1776 /// \param F The function to query.
1777 ///
1778 /// \returns true if \p F was modified. Returns false, otherwise.
1779 bool SampleProfileLoader::emitAnnotations(Function &F) {
1780  bool Changed = false;
1781 
1783  if (!ProbeManager->profileIsValid(F, *Samples)) {
1784  LLVM_DEBUG(
1785  dbgs() << "Profile is invalid due to CFG mismatch for Function "
1786  << F.getName());
1787  ++NumMismatchedProfile;
1788  return false;
1789  }
1790  ++NumMatchedProfile;
1791  } else {
1792  if (getFunctionLoc(F) == 0)
1793  return false;
1794 
1795  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1796  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1797  }
1798 
1799  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1801  Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1802  else
1803  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1804 
1805  Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1806 
1807  if (Changed)
1808  generateMDProfMetadata(F);
1809 
1810  emitCoverageRemarks(F);
1811  return Changed;
1812 }
1813 
1815 
1816 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1817  "Sample Profile loader", false, false)
1822 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1824 
1825 std::unique_ptr<ProfiledCallGraph>
1826 SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
1827  std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1829  ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1830  else
1831  ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1832 
1833  // Add all functions into the profiled call graph even if they are not in
1834  // the profile. This makes sure functions missing from the profile still
1835  // gets a chance to be processed.
1836  for (auto &Node : CG) {
1837  const auto *F = Node.first;
1838  if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
1839  continue;
1840  ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
1841  }
1842 
1843  return ProfiledCG;
1844 }
1845 
1846 std::vector<Function *>
1847 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
1848  std::vector<Function *> FunctionOrderList;
1849  FunctionOrderList.reserve(M.size());
1850 
1852  errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1853  "together with -sample-profile-top-down-load.\n";
1854 
1855  if (!ProfileTopDownLoad || CG == nullptr) {
1856  if (ProfileMergeInlinee) {
1857  // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1858  // because the profile for a function may be used for the profile
1859  // annotation of its outline copy before the profile merging of its
1860  // non-inlined inline instances, and that is not the way how
1861  // ProfileMergeInlinee is supposed to work.
1862  ProfileMergeInlinee = false;
1863  }
1864 
1865  for (Function &F : M)
1866  if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1867  FunctionOrderList.push_back(&F);
1868  return FunctionOrderList;
1869  }
1870 
1871  assert(&CG->getModule() == &M);
1872 
1875  // Use profiled call edges to augment the top-down order. There are cases
1876  // that the top-down order computed based on the static call graph doesn't
1877  // reflect real execution order. For example
1878  //
1879  // 1. Incomplete static call graph due to unknown indirect call targets.
1880  // Adjusting the order by considering indirect call edges from the
1881  // profile can enable the inlining of indirect call targets by allowing
1882  // the caller processed before them.
1883  // 2. Mutual call edges in an SCC. The static processing order computed for
1884  // an SCC may not reflect the call contexts in the context-sensitive
1885  // profile, thus may cause potential inlining to be overlooked. The
1886  // function order in one SCC is being adjusted to a top-down order based
1887  // on the profile to favor more inlining. This is only a problem with CS
1888  // profile.
1889  // 3. Transitive indirect call edges due to inlining. When a callee function
1890  // (say B) is inlined into into a caller function (say A) in LTO prelink,
1891  // every call edge originated from the callee B will be transferred to
1892  // the caller A. If any transferred edge (say A->C) is indirect, the
1893  // original profiled indirect edge B->C, even if considered, would not
1894  // enforce a top-down order from the caller A to the potential indirect
1895  // call target C in LTO postlink since the inlined callee B is gone from
1896  // the static call graph.
1897  // 4. #3 can happen even for direct call targets, due to functions defined
1898  // in header files. A header function (say A), when included into source
1899  // files, is defined multiple times but only one definition survives due
1900  // to ODR. Therefore, the LTO prelink inlining done on those dropped
1901  // definitions can be useless based on a local file scope. More
1902  // importantly, the inlinee (say B), once fully inlined to a
1903  // to-be-dropped A, will have no profile to consume when its outlined
1904  // version is compiled. This can lead to a profile-less prelink
1905  // compilation for the outlined version of B which may be called from
1906  // external modules. while this isn't easy to fix, we rely on the
1907  // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1908  // the A can be inlined in its local scope in prelink, it may not exist
1909  // in the merged IR in postlink, and we'll need the profiled call edges
1910  // to enforce a top-down order for the rest of the functions.
1911  //
1912  // Considering those cases, a profiled call graph completely independent of
1913  // the static call graph is constructed based on profile data, where
1914  // function objects are not even needed to handle case #3 and case 4.
1915  //
1916  // Note that static callgraph edges are completely ignored since they
1917  // can be conflicting with profiled edges for cyclic SCCs and may result in
1918  // an SCC order incompatible with profile-defined one. Using strictly
1919  // profile order ensures a maximum inlining experience. On the other hand,
1920  // static call edges are not so important when they don't correspond to a
1921  // context in the profile.
1922 
1923  std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1924  scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1925  while (!CGI.isAtEnd()) {
1926  auto Range = *CGI;
1927  if (SortProfiledSCC) {
1928  // Sort nodes in one SCC based on callsite hotness.
1930  Range = *SI;
1931  }
1932  for (auto *Node : Range) {
1933  Function *F = SymbolMap.lookup(Node->Name);
1934  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1935  FunctionOrderList.push_back(F);
1936  }
1937  ++CGI;
1938  }
1939  } else {
1941  while (!CGI.isAtEnd()) {
1942  for (CallGraphNode *Node : *CGI) {
1943  auto *F = Node->getFunction();
1944  if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
1945  FunctionOrderList.push_back(F);
1946  }
1947  ++CGI;
1948  }
1949  }
1950 
1951  LLVM_DEBUG({
1952  dbgs() << "Function processing order:\n";
1953  for (auto F : reverse(FunctionOrderList)) {
1954  dbgs() << F->getName() << "\n";
1955  }
1956  });
1957 
1958  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1959  return FunctionOrderList;
1960 }
1961 
1962 bool SampleProfileLoader::doInitialization(Module &M,
1964  auto &Ctx = M.getContext();
1965 
1966  auto ReaderOrErr = SampleProfileReader::create(
1967  Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
1968  if (std::error_code EC = ReaderOrErr.getError()) {
1969  std::string Msg = "Could not open profile: " + EC.message();
1970  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1971  return false;
1972  }
1973  Reader = std::move(ReaderOrErr.get());
1975  // set module before reading the profile so reader may be able to only
1976  // read the function profiles which are used by the current module.
1977  Reader->setModule(&M);
1978  if (std::error_code EC = Reader->read()) {
1979  std::string Msg = "profile reading failed: " + EC.message();
1980  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1981  return false;
1982  }
1983 
1984  PSL = Reader->getProfileSymbolList();
1985 
1986  // While profile-sample-accurate is on, ignore symbol list.
1987  ProfAccForSymsInList =
1989  if (ProfAccForSymsInList) {
1990  NamesInProfile.clear();
1991  if (auto NameTable = Reader->getNameTable())
1992  NamesInProfile.insert(NameTable->begin(), NameTable->end());
1993  CoverageTracker.setProfAccForSymsInList(true);
1994  }
1995 
1996  if (FAM && !ProfileInlineReplayFile.empty()) {
1997  ExternalInlineAdvisor = getReplayInlineAdvisor(
1998  M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2003  /*EmitRemarks=*/false);
2004  }
2005 
2006  // Apply tweaks if context-sensitive or probe-based profile is available.
2007  if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2008  Reader->profileIsProbeBased()) {
2010  UseIterativeBFIInference = true;
2012  SampleProfileUseProfi = true;
2015  // Enable priority-base inliner and size inline by default for CSSPGO.
2017  ProfileSizeInline = true;
2020  // For CSSPGO, we also allow recursive inline to best use context profile.
2022  AllowRecursiveInline = true;
2023 
2024  if (Reader->profileIsPreInlined()) {
2026  UsePreInlinerDecision = true;
2027  }
2028 
2029  if (!Reader->profileIsCS()) {
2030  // Non-CS profile should be fine without a function size budget for the
2031  // inliner since the contexts in the profile are either all from inlining
2032  // in the prevoius build or pre-computed by the preinliner with a size
2033  // cap, thus they are bounded.
2034  if (!ProfileInlineLimitMin.getNumOccurrences())
2036  if (!ProfileInlineLimitMax.getNumOccurrences())
2038  }
2039  }
2040 
2041  if (Reader->profileIsCS()) {
2042  // Tracker for profiles under different context
2043  ContextTracker = std::make_unique<SampleContextTracker>(
2044  Reader->getProfiles(), &GUIDToFuncNameMap);
2045  }
2046 
2047  // Load pseudo probe descriptors for probe-based function samples.
2048  if (Reader->profileIsProbeBased()) {
2049  ProbeManager = std::make_unique<PseudoProbeManager>(M);
2050  if (!ProbeManager->moduleIsProbed(M)) {
2051  const char *Msg =
2052  "Pseudo-probe-based profile requires SampleProfileProbePass";
2053  Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2054  DS_Warning));
2055  return false;
2056  }
2057  }
2058 
2059  return true;
2060 }
2061 
2063  return new SampleProfileLoaderLegacyPass();
2064 }
2065 
2067  return new SampleProfileLoaderLegacyPass(Name);
2068 }
2069 
2070 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2071  ProfileSummaryInfo *_PSI, CallGraph *CG) {
2072  GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2073 
2074  PSI = _PSI;
2075  if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2076  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2078  PSI->refresh();
2079  }
2080  // Compute the total number of samples collected in this profile.
2081  for (const auto &I : Reader->getProfiles())
2082  TotalCollectedSamples += I.second.getTotalSamples();
2083 
2084  auto Remapper = Reader->getRemapper();
2085  // Populate the symbol map.
2086  for (const auto &N_F : M.getValueSymbolTable()) {
2087  StringRef OrigName = N_F.getKey();
2088  Function *F = dyn_cast<Function>(N_F.getValue());
2089  if (F == nullptr || OrigName.empty())
2090  continue;
2091  SymbolMap[OrigName] = F;
2093  if (OrigName != NewName && !NewName.empty()) {
2094  auto r = SymbolMap.insert(std::make_pair(NewName, F));
2095  // Failiing to insert means there is already an entry in SymbolMap,
2096  // thus there are multiple functions that are mapped to the same
2097  // stripped name. In this case of name conflicting, set the value
2098  // to nullptr to avoid confusion.
2099  if (!r.second)
2100  r.first->second = nullptr;
2101  OrigName = NewName;
2102  }
2103  // Insert the remapped names into SymbolMap.
2104  if (Remapper) {
2105  if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2106  if (*MapName != OrigName && !MapName->empty())
2107  SymbolMap.insert(std::make_pair(*MapName, F));
2108  }
2109  }
2110  }
2111  assert(SymbolMap.count(StringRef()) == 0 &&
2112  "No empty StringRef should be added in SymbolMap");
2113 
2114  bool retval = false;
2115  for (auto F : buildFunctionOrder(M, CG)) {
2116  assert(!F->isDeclaration());
2117  clearFunctionData();
2118  retval |= runOnFunction(*F, AM);
2119  }
2120 
2121  // Account for cold calls not inlined....
2123  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2124  notInlinedCallInfo)
2125  updateProfileCallee(pair.first, pair.second.entryCount);
2126 
2127  return retval;
2128 }
2129 
2130 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
2131  ACT = &getAnalysis<AssumptionCacheTracker>();
2132  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2133  TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2134  ProfileSummaryInfo *PSI =
2135  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2136  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
2137 }
2138 
2140  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2141  DILocation2SampleMap.clear();
2142  // By default the entry count is initialized to -1, which will be treated
2143  // conservatively by getEntryCount as the same as unknown (None). This is
2144  // to avoid newly added code to be treated as cold. If we have samples
2145  // this will be overwritten in emitAnnotations.
2146  uint64_t initialEntryCount = -1;
2147 
2148  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2149  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2150  // initialize all the function entry counts to 0. It means all the
2151  // functions without profile will be regarded as cold.
2152  initialEntryCount = 0;
2153  // profile-sample-accurate is a user assertion which has a higher precedence
2154  // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2155  ProfAccForSymsInList = false;
2156  }
2157  CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2158 
2159  // PSL -- profile symbol list include all the symbols in sampled binary.
2160  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2161  // old functions without samples being cold, without having to worry
2162  // about new and hot functions being mistakenly treated as cold.
2163  if (ProfAccForSymsInList) {
2164  // Initialize the entry count to 0 for functions in the list.
2165  if (PSL->contains(F.getName()))
2166  initialEntryCount = 0;
2167 
2168  // Function in the symbol list but without sample will be regarded as
2169  // cold. To minimize the potential negative performance impact it could
2170  // have, we want to be a little conservative here saying if a function
2171  // shows up in the profile, no matter as outline function, inline instance
2172  // or call targets, treat the function as not being cold. This will handle
2173  // the cases such as most callsites of a function are inlined in sampled
2174  // binary but not inlined in current build (because of source code drift,
2175  // imprecise debug information, or the callsites are all cold individually
2176  // but not cold accumulatively...), so the outline function showing up as
2177  // cold in sampled binary will actually not be cold after current build.
2179  if (NamesInProfile.count(CanonName))
2180  initialEntryCount = -1;
2181  }
2182 
2183  // Initialize entry count when the function has no existing entry
2184  // count value.
2185  if (!F.getEntryCount().hasValue())
2186  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2187  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2188  if (AM) {
2189  auto &FAM =
2191  .getManager();
2193  } else {
2194  OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2195  ORE = OwnedORE.get();
2196  }
2197 
2199  Samples = ContextTracker->getBaseSamplesFor(F);
2200  else
2201  Samples = Reader->getSamplesFor(F);
2202 
2203  if (Samples && !Samples->empty())
2204  return emitAnnotations(F);
2205  return false;
2206 }
2207 
2209  ModuleAnalysisManager &AM) {
2212 
2213  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2214  return FAM.getResult<AssumptionAnalysis>(F);
2215  };
2216  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2217  return FAM.getResult<TargetIRAnalysis>(F);
2218  };
2219  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2221  };
2222 
2223  SampleProfileLoader SampleLoader(
2224  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2225  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2226  : ProfileRemappingFileName,
2227  LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2228 
2229  if (!SampleLoader.doInitialization(M, &FAM))
2230  return PreservedAnalyses::all();
2231 
2234  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
2235  return PreservedAnalyses::all();
2236 
2237  return PreservedAnalyses::none();
2238 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Instrumentation.h
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:129
llvm::sampleprof::FunctionSamples::getBodySamples
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:920
llvm::getReplayInlineAdvisor
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
Definition: ReplayInlineAdvisor.cpp:78
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:135
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2461
llvm::SampleProfileLoaderPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: SampleProfile.cpp:2208
EnableExtTspBlockPlacement
cl::opt< bool > EnableExtTspBlockPlacement
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
ProfileInlineGrowthLimit
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
llvm::sampleprof::ContextDuplicatedIntoBase
@ ContextDuplicatedIntoBase
Definition: SampleProf.h:443
ProfileInlineLimitMax
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::sampleprof::FunctionSamples::ProfileIsProbeBased
static bool ProfileIsProbeBased
Definition: SampleProf.h:1112
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:304
llvm::sampleprof::FunctionSamples::ProfileIsCS
static bool ProfileIsCS
Definition: SampleProf.h:1114
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
SCCIterator.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
T
llvm::sampleprof::SampleProfileReader::profileIsProbeBased
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
Definition: SampleProfReader.h:471
llvm::sampleprof::SampleContext::hasAttribute
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:590
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
SizeLimit
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
StringRef.h
Pass.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfile.cpp:96
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:81
llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1573
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
SampleProfileRemappingFile
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ProfileICPRelativeHotnessSkip
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::emitInlinedIntoBasedOnCost
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
Definition: InlineAdvisor.cpp:489
llvm::createSampleProfileLoaderPass
ModulePass * createSampleProfileLoaderPass()
Definition: SampleProfile.cpp:2062
ProfileInlineLimitMin
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1551
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::sampleprof::ContextShouldBeInlined
@ ContextShouldBeInlined
Definition: SampleProf.h:442
DenseMap.h
updateIDTMetaData
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
Definition: SampleProfile.cpp:836
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:117
ProfileMergeInlinee
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:190
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::PseudoProbe::Factor
float Factor
Definition: PseudoProbe.h:80
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::sampleprof::FunctionSamples::findInlinedFunctions
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:981
llvm::CallSiteFormat::Format::LineDiscriminator
@ LineDiscriminator
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::sampleprof::FunctionSamples::getName
StringRef getName() const
Return the function name.
Definition: SampleProf.h:1011
llvm::sampleprof::FunctionSamplesMap
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Definition: SampleProf.h:712
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::initializeSampleProfileLoaderLegacyPassPass
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:130
llvm::sampleprof::SampleProfileReader::getRemapper
SampleProfileReaderItaniumRemapper * getRemapper()
Definition: SampleProfReader.h:497
llvm::scc_member_iterator
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:252
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
ProfileInlineReplayFallback
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
llvm::ReplayInlinerSettings::Fallback::Original
@ Original
ProfileSampleBlockAccurate
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1368
llvm::InlineParams::ComputeFullInlineCost
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:217
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::PseudoProbe::Id
uint32_t Id
Definition: PseudoProbe.h:74
llvm::sampleprof::FunctionSamples::SetContextSynthetic
void SetContextSynthetic()
Definition: SampleProf.h:817
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AllowRecursiveInline
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Allow sample loader inliner to inline recursive calls."))
Instruction.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
llvm::sampleprof::FunctionSamples::getFuncName
StringRef getFuncName() const
Return the original function name.
Definition: SampleProf.h:1014
BlockFrequencyInfoImpl.h
llvm::Instruction::extractProfTotalWeight
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1462
GlobalValue.h
DisableSampleLoaderInlining
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:241
llvm::sampleprof::SampleProfileReader::profileIsCS
bool profileIsCS() const
Whether input profile is fully context-sensitive.
Definition: SampleProfReader.h:474
SortProfiledSCC
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
llvm::msgpack::Type::Map
@ Map
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2792
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::sampleprof::SampleProfileReader::read
std::error_code read()
The interface to read sample profiles from the associated file.
Definition: SampleProfReader.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ProfileSummary::getMD
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Definition: ProfileSummary.cpp:80
Twine.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
UsePreInlinerDecision
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use the preinliner decisions stored in profile context."))
llvm::sampleprof::ProfiledCallGraph
Definition: ProfiledCallGraph.h:62
llvm::sampleprof::SyntheticContext
@ SyntheticContext
Definition: SampleProf.h:433
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:87
llvm::updateProfileCallee
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
Definition: InlineFunction.cpp:1607
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::orc::SymbolMap
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:113
llvm::sampleprof::FunctionSamples::getGUID
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:1138
SampleProf.h
InlineAdvisor.h
ProfileInlineReplayFormat
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfile.cpp:95
llvm::CallSiteFormat::Format::LineColumnDiscriminator
@ LineColumnDiscriminator
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
MDBuilder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ReplayInlinerSettings::Fallback::NeverInline
@ NeverInline
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::setProbeDistributionFactor
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:65
DebugLoc.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:112
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3070
SampleProfileLoaderBaseUtil.h
StringMap.h
llvm::isLegalToPromote
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Definition: CallPromotionUtils.cpp:382
llvm::ProfileSummary::PSK_Sample
@ PSK_Sample
Definition: ProfileSummary.h:47
llvm::CallSiteFormat::Format::LineColumn
@ LineColumn
llvm::sampleprof::SampleProfileReader::getNameTable
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
Definition: SampleProfReader.h:485
llvm::sampleprof::SampleContext
Definition: SampleProf.h:501
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::StringMap
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:110
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::sampleprof::FunctionSamples::empty
bool empty() const
Definition: SampleProf.h:882
ValueSymbolTable.h
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
SampleProfile.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
ReplayInlineAdvisor.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::DiagnosticInfoOptimizationBase::Argument
Used in the streaming interface as the general argument type.
Definition: DiagnosticInfo.h:427
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
ProfiledCallGraph.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
ProfileSummaryInfo.h
MaxNumPromotions
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2517
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
llvm::sampleprof::FunctionSamples::getEntrySamples
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:896
llvm::sampleprof::SampleProfileReader::getSamplesFor
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
Definition: SampleProfReader.h:395
SampleProfileFile
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:720
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:716
ErrorOr.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
Cloning.h
SampleProfReader.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
ArrayRef.h
llvm::codeview::FrameProcedureOptions::Inlined
@ Inlined
llvm::sampleprof::SampleRecord::adjustCallTargets
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:407
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::ReplayInlinerSettings::Scope::Module
@ Module
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1665
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:279
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::sampleprof::FunctionSamples::UseMD5
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1123
llvm::codeview::CompileSym2Flags::EC
@ EC
InlineCost.h
CSINLINE_DEBUG
#define CSINLINE_DEBUG
Definition: SampleProfile.cpp:97
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::sampleprof::SampleProfileReader::create
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
Definition: SampleProfReader.cpp:1787
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
SampleProfileProbe.h
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:121
llvm::sampleprof::SampleProfileReader::setSkipFlatProf
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
Definition: SampleProfReader.h:493
SampleHotCallSiteThreshold
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:291
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::CallSiteFormat::Format::Line
@ Line
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::sampleprof::SampleProfileReader::getProfileSymbolList
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
Definition: SampleProfReader.h:479
uint32_t
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
CallPromotionUtils.h
Profile
Load MIR Sample Profile
Definition: MIRSampleProfile.cpp:70
llvm::ContextTrieNode
Definition: SampleContextTracker.h:33
SampleProfileLoaderBaseImpl.h
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:655
SampleContextTracker.h
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:994
llvm::sampleprofutil
Definition: SampleProfileLoaderBaseUtil.h:33
llvm::sampleprof::SampleProfileReader::getSummary
ProfileSummary & getSummary() const
Return the profile summary.
Definition: SampleProfReader.h:463
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::sampleprof::SampleProfileReader::getProfiles
SampleProfileMap & getProfiles()
Return all the profiles.
Definition: SampleProfReader.h:438
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:243
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
profile
sample profile
Definition: SampleProfile.cpp:1822
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
std
Definition: BitVector.h:851
llvm::sampleprof::SampleProfileReader::getOrCreateSamplesFor
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
Definition: SampleProfReader.h:405
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
ProfileInlineReplayScope
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:516
Casting.h
llvm::sampleprofutil::callsiteIsHot
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Definition: SampleProfileLoaderBaseUtil.cpp:68
DiagnosticInfo.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1552
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:199
UseProfiledCallGraph
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
llvm::pdb::PDB_SymType::CallSite
@ CallSite
llvm::sampleprof::SampleProfileReader
Sample-based profile reader.
Definition: SampleProfReader.h:342
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
PseudoProbe.h
llvm::sampleprof::FunctionSamples::merge
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:941
llvm::cl::value_desc
Definition: CommandLine.h:414
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::NOMORE_ICP_MAGICNUM
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:57
llvm::sampleprof::SampleProfileReader::setModule
void setModule(const Module *Mod)
Definition: SampleProfReader.h:499
SampleColdCallSiteThreshold
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:101
llvm::sampleprof::SampleProfileReader::profileIsPreInlined
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Definition: SampleProfReader.h:477
llvm::extractProbe
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:48
ProfileAccurateForSymsInList
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
llvm::sampleprof::FunctionSamples::getContext
SampleContext & getContext() const
Definition: SampleProf.h:1118
ProfileSampleAccurate
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
llvm::pgo::promoteIndirectCall
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Definition: IndirectCallPromotion.cpp:244
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::scc_iterator::isAtEnd
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:112
CallGraph.h
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:24
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::sampleprof::FunctionSamples::getCanonicalFnName
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1022
Instructions.h
loader
sample Sample Profile loader
Definition: SampleProfile.cpp:1823
SmallVector.h
llvm::sampleprof::SampleRecord::SortCallTargets
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:398
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
GetSortedValueDataFromCallTargets
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
Definition: SampleProfile.cpp:1595
OverwriteExistingWeights
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
ProfileTopDownLoad
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:224
CallsitePrioritizedInline
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
ProfileInlineReplayFile
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
llvm::ReplayInlinerSettings::Scope::Function
@ Function
TargetTransformInfo.h
ProfileSizeInline
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1748
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1039
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:2015
llvm::ReplayInlinerSettings::Fallback::AlwaysInline
@ AlwaysInline
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
llvm::X86AS::FS
@ FS
Definition: X86.h:192
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:223
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::ReplayInlinerSettings
Replay Inliner Setup.
Definition: ReplayInlineAdvisor.h:43
ProfileICPRelativeHotness
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SpecialSubKind::string
@ string
doesHistoryAllowICP
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
Definition: SampleProfile.cpp:799
llvm::sampleprof::FunctionSamples::getCallSiteIdentifier
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:223
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792