LLVM  8.0.0svn
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SampleProfileLoader transformation. This pass
11 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
12 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
13 // profile information in the given profile.
14 //
15 // This pass generates branch weight annotations on the IR:
16 //
17 // - prof: Represents branch weights. This annotation is added to branches
18 // to indicate the weights of each edge coming out of the branch.
19 // The weight of each edge is the weight of the target block for
20 // that edge. The weight of a block B is computed as the maximum
21 // number of samples found in B.
22 //
23 //===----------------------------------------------------------------------===//
24 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/DenseSet.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
38 #include "llvm/Analysis/LoopInfo.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CFG.h"
45 #include "llvm/IR/CallSite.h"
47 #include "llvm/IR/DebugLoc.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Dominators.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/LLVMContext.h"
57 #include "llvm/IR/MDBuilder.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/PassManager.h"
61 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/ErrorOr.h"
72 #include "llvm/Transforms/IPO.h"
76 #include <algorithm>
77 #include <cassert>
78 #include <cstdint>
79 #include <functional>
80 #include <limits>
81 #include <map>
82 #include <memory>
83 #include <string>
84 #include <system_error>
85 #include <utility>
86 #include <vector>
87 
88 using namespace llvm;
89 using namespace sampleprof;
91 #define DEBUG_TYPE "sample-profile"
92 
93 // Command line option to specify the file to read samples from. This is
94 // mainly used for debugging.
96  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
97  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
98 
99 // The named file contains a set of transformations that may have been applied
100 // to the symbol names between the program from which the sample data was
101 // collected and the current program's symbols.
103  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
104  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
105 
107  "sample-profile-max-propagate-iterations", cl::init(100),
108  cl::desc("Maximum number of iterations to go through when propagating "
109  "sample block/edge weights through the CFG."));
110 
112  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
113  cl::desc("Emit a warning if less than N% of records in the input profile "
114  "are matched to the IR."));
115 
117  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
118  cl::desc("Emit a warning if less than N% of samples in the input profile "
119  "are matched to the IR."));
120 
122  "no-warn-sample-unused", cl::init(false), cl::Hidden,
123  cl::desc("Use this option to turn off/on warnings about function with "
124  "samples but without debug information to use those samples. "));
125 
126 namespace {
127 
128 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
129 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
130 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
131 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
132 using BlockEdgeMap =
134 
135 class SampleCoverageTracker {
136 public:
137  SampleCoverageTracker() = default;
138 
139  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
140  uint32_t Discriminator, uint64_t Samples);
141  unsigned computeCoverage(unsigned Used, unsigned Total) const;
142  unsigned countUsedRecords(const FunctionSamples *FS,
143  ProfileSummaryInfo *PSI) const;
144  unsigned countBodyRecords(const FunctionSamples *FS,
145  ProfileSummaryInfo *PSI) const;
146  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
147  uint64_t countBodySamples(const FunctionSamples *FS,
148  ProfileSummaryInfo *PSI) const;
149 
150  void clear() {
151  SampleCoverage.clear();
152  TotalUsedSamples = 0;
153  }
154 
155 private:
156  using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
157  using FunctionSamplesCoverageMap =
159 
160  /// Coverage map for sampling records.
161  ///
162  /// This map keeps a record of sampling records that have been matched to
163  /// an IR instruction. This is used to detect some form of staleness in
164  /// profiles (see flag -sample-profile-check-coverage).
165  ///
166  /// Each entry in the map corresponds to a FunctionSamples instance. This is
167  /// another map that counts how many times the sample record at the
168  /// given location has been used.
169  FunctionSamplesCoverageMap SampleCoverage;
170 
171  /// Number of samples used from the profile.
172  ///
173  /// When a sampling record is used for the first time, the samples from
174  /// that record are added to this accumulator. Coverage is later computed
175  /// based on the total number of samples available in this function and
176  /// its callsites.
177  ///
178  /// Note that this accumulator tracks samples used from a single function
179  /// and all the inlined callsites. Strictly, we should have a map of counters
180  /// keyed by FunctionSamples pointers, but these stats are cleared after
181  /// every function, so we just need to keep a single counter.
182  uint64_t TotalUsedSamples = 0;
183 };
184 
185 /// Sample profile pass.
186 ///
187 /// This pass reads profile data from the file specified by
188 /// -sample-profile-file and annotates every affected function with the
189 /// profile information found in that file.
190 class SampleProfileLoader {
191 public:
192  SampleProfileLoader(
193  StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
194  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
195  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
196  : GetAC(std::move(GetAssumptionCache)),
197  GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
198  RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
199 
200  bool doInitialization(Module &M);
201  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
202  ProfileSummaryInfo *_PSI);
203 
204  void dump() { Reader->dump(); }
205 
206 protected:
208  unsigned getFunctionLoc(Function &F);
209  bool emitAnnotations(Function &F);
210  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
211  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
212  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
213  std::vector<const FunctionSamples *>
214  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
215  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
216  bool inlineCallInstruction(Instruction *I);
217  bool inlineHotFunctions(Function &F,
218  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
219  void printEdgeWeight(raw_ostream &OS, Edge E);
220  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
221  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
222  bool computeBlockWeights(Function &F);
223  void findEquivalenceClasses(Function &F);
224  template <bool IsPostDom>
225  void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
227 
228  void propagateWeights(Function &F);
229  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
230  void buildEdges(Function &F);
231  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
232  void computeDominanceAndLoopInfo(Function &F);
233  void clearFunctionData();
234 
235  /// Map basic blocks to their computed weights.
236  ///
237  /// The weight of a basic block is defined to be the maximum
238  /// of all the instruction weights in that block.
239  BlockWeightMap BlockWeights;
240 
241  /// Map edges to their computed weights.
242  ///
243  /// Edge weights are computed by propagating basic block weights in
244  /// SampleProfile::propagateWeights.
245  EdgeWeightMap EdgeWeights;
246 
247  /// Set of visited blocks during propagation.
249 
250  /// Set of visited edges during propagation.
251  SmallSet<Edge, 32> VisitedEdges;
252 
253  /// Equivalence classes for block weights.
254  ///
255  /// Two blocks BB1 and BB2 are in the same equivalence class if they
256  /// dominate and post-dominate each other, and they are in the same loop
257  /// nest. When this happens, the two blocks are guaranteed to execute
258  /// the same number of times.
259  EquivalenceClassMap EquivalenceClass;
260 
261  /// Map from function name to Function *. Used to find the function from
262  /// the function name. If the function name contains suffix, additional
263  /// entry is added to map from the stripped name to the function if there
264  /// is one-to-one mapping.
266 
267  /// Dominance, post-dominance and loop information.
268  std::unique_ptr<DominatorTree> DT;
269  std::unique_ptr<PostDominatorTree> PDT;
270  std::unique_ptr<LoopInfo> LI;
271 
272  std::function<AssumptionCache &(Function &)> GetAC;
273  std::function<TargetTransformInfo &(Function &)> GetTTI;
274 
275  /// Predecessors for each basic block in the CFG.
276  BlockEdgeMap Predecessors;
277 
278  /// Successors for each basic block in the CFG.
279  BlockEdgeMap Successors;
280 
281  SampleCoverageTracker CoverageTracker;
282 
283  /// Profile reader object.
284  std::unique_ptr<SampleProfileReader> Reader;
285 
286  /// Samples collected for the body of this function.
287  FunctionSamples *Samples = nullptr;
288 
289  /// Name of the profile file to load.
290  std::string Filename;
291 
292  /// Name of the profile remapping file to load.
293  std::string RemappingFilename;
294 
295  /// Flag indicating whether the profile input loaded successfully.
296  bool ProfileIsValid = false;
297 
298  /// Flag indicating if the pass is invoked in ThinLTO compile phase.
299  ///
300  /// In this phase, in annotation, we should not promote indirect calls.
301  /// Instead, we will mark GUIDs that needs to be annotated to the function.
302  bool IsThinLTOPreLink;
303 
304  /// Profile Summary Info computed from sample profile.
305  ProfileSummaryInfo *PSI = nullptr;
306 
307  /// Total number of samples collected in this profile.
308  ///
309  /// This is the sum of all the samples collected in all the functions executed
310  /// at runtime.
311  uint64_t TotalCollectedSamples = 0;
312 
313  /// Optimization Remark Emitter used to emit diagnostic remarks.
314  OptimizationRemarkEmitter *ORE = nullptr;
315 };
316 
317 class SampleProfileLoaderLegacyPass : public ModulePass {
318 public:
319  // Class identification, replacement for typeinfo
320  static char ID;
321 
322  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
323  bool IsThinLTOPreLink = false)
324  : ModulePass(ID),
325  SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
326  [&](Function &F) -> AssumptionCache & {
327  return ACT->getAssumptionCache(F);
328  },
329  [&](Function &F) -> TargetTransformInfo & {
330  return TTIWP->getTTI(F);
331  }) {
334  }
335 
336  void dump() { SampleLoader.dump(); }
337 
338  bool doInitialization(Module &M) override {
339  return SampleLoader.doInitialization(M);
340  }
341 
342  StringRef getPassName() const override { return "Sample profile pass"; }
343  bool runOnModule(Module &M) override;
344 
345  void getAnalysisUsage(AnalysisUsage &AU) const override {
349  }
350 
351 private:
352  SampleProfileLoader SampleLoader;
353  AssumptionCacheTracker *ACT = nullptr;
354  TargetTransformInfoWrapperPass *TTIWP = nullptr;
355 };
356 
357 } // end anonymous namespace
358 
359 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
360 ///
361 /// Functions that were inlined in the original binary will be represented
362 /// in the inline stack in the sample profile. If the profile shows that
363 /// the original inline decision was "good" (i.e., the callsite is executed
364 /// frequently), then we will recreate the inline decision and apply the
365 /// profile from the inlined callsite.
366 ///
367 /// To decide whether an inlined callsite is hot, we compare the callsite
368 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
369 /// regarded as hot if the count is above the cutoff value.
370 static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
371  ProfileSummaryInfo *PSI) {
372  if (!CallsiteFS)
373  return false; // The callsite was not inlined in the original binary.
374 
375  assert(PSI && "PSI is expected to be non null");
376  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
377  return PSI->isHotCount(CallsiteTotalSamples);
378 }
379 
380 /// Mark as used the sample record for the given function samples at
381 /// (LineOffset, Discriminator).
382 ///
383 /// \returns true if this is the first time we mark the given record.
384 bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
385  uint32_t LineOffset,
386  uint32_t Discriminator,
387  uint64_t Samples) {
388  LineLocation Loc(LineOffset, Discriminator);
389  unsigned &Count = SampleCoverage[FS][Loc];
390  bool FirstTime = (++Count == 1);
391  if (FirstTime)
392  TotalUsedSamples += Samples;
393  return FirstTime;
394 }
395 
396 /// Return the number of sample records that were applied from this profile.
397 ///
398 /// This count does not include records from cold inlined callsites.
399 unsigned
400 SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
401  ProfileSummaryInfo *PSI) const {
402  auto I = SampleCoverage.find(FS);
403 
404  // The size of the coverage map for FS represents the number of records
405  // that were marked used at least once.
406  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
407 
408  // If there are inlined callsites in this function, count the samples found
409  // in the respective bodies. However, do not bother counting callees with 0
410  // total samples, these are callees that were never invoked at runtime.
411  for (const auto &I : FS->getCallsiteSamples())
412  for (const auto &J : I.second) {
413  const FunctionSamples *CalleeSamples = &J.second;
414  if (callsiteIsHot(CalleeSamples, PSI))
415  Count += countUsedRecords(CalleeSamples, PSI);
416  }
417 
418  return Count;
419 }
420 
421 /// Return the number of sample records in the body of this profile.
422 ///
423 /// This count does not include records from cold inlined callsites.
424 unsigned
425 SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
426  ProfileSummaryInfo *PSI) const {
427  unsigned Count = FS->getBodySamples().size();
428 
429  // Only count records in hot callsites.
430  for (const auto &I : FS->getCallsiteSamples())
431  for (const auto &J : I.second) {
432  const FunctionSamples *CalleeSamples = &J.second;
433  if (callsiteIsHot(CalleeSamples, PSI))
434  Count += countBodyRecords(CalleeSamples, PSI);
435  }
436 
437  return Count;
438 }
439 
440 /// Return the number of samples collected in the body of this profile.
441 ///
442 /// This count does not include samples from cold inlined callsites.
443 uint64_t
444 SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
445  ProfileSummaryInfo *PSI) const {
446  uint64_t Total = 0;
447  for (const auto &I : FS->getBodySamples())
448  Total += I.second.getSamples();
449 
450  // Only count samples in hot callsites.
451  for (const auto &I : FS->getCallsiteSamples())
452  for (const auto &J : I.second) {
453  const FunctionSamples *CalleeSamples = &J.second;
454  if (callsiteIsHot(CalleeSamples, PSI))
455  Total += countBodySamples(CalleeSamples, PSI);
456  }
457 
458  return Total;
459 }
460 
461 /// Return the fraction of sample records used in this profile.
462 ///
463 /// The returned value is an unsigned integer in the range 0-100 indicating
464 /// the percentage of sample records that were used while applying this
465 /// profile to the associated function.
466 unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
467  unsigned Total) const {
468  assert(Used <= Total &&
469  "number of used records cannot exceed the total number of records");
470  return Total > 0 ? Used * 100 / Total : 100;
471 }
472 
473 /// Clear all the per-function data used to load samples and propagate weights.
474 void SampleProfileLoader::clearFunctionData() {
475  BlockWeights.clear();
476  EdgeWeights.clear();
477  VisitedBlocks.clear();
478  VisitedEdges.clear();
479  EquivalenceClass.clear();
480  DT = nullptr;
481  PDT = nullptr;
482  LI = nullptr;
483  Predecessors.clear();
484  Successors.clear();
485  CoverageTracker.clear();
486 }
487 
488 #ifndef NDEBUG
489 /// Print the weight of edge \p E on stream \p OS.
490 ///
491 /// \param OS Stream to emit the output to.
492 /// \param E Edge to print.
493 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
494  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
495  << "]: " << EdgeWeights[E] << "\n";
496 }
497 
498 /// Print the equivalence class of block \p BB on stream \p OS.
499 ///
500 /// \param OS Stream to emit the output to.
501 /// \param BB Block to print.
502 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
503  const BasicBlock *BB) {
504  const BasicBlock *Equiv = EquivalenceClass[BB];
505  OS << "equivalence[" << BB->getName()
506  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
507 }
508 
509 /// Print the weight of block \p BB on stream \p OS.
510 ///
511 /// \param OS Stream to emit the output to.
512 /// \param BB Block to print.
513 void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
514  const BasicBlock *BB) const {
515  const auto &I = BlockWeights.find(BB);
516  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
517  OS << "weight[" << BB->getName() << "]: " << W << "\n";
518 }
519 #endif
520 
521 /// Get the weight for an instruction.
522 ///
523 /// The "weight" of an instruction \p Inst is the number of samples
524 /// collected on that instruction at runtime. To retrieve it, we
525 /// need to compute the line number of \p Inst relative to the start of its
526 /// function. We use HeaderLineno to compute the offset. We then
527 /// look up the samples collected for \p Inst using BodySamples.
528 ///
529 /// \param Inst Instruction to query.
530 ///
531 /// \returns the weight of \p Inst.
532 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
533  const DebugLoc &DLoc = Inst.getDebugLoc();
534  if (!DLoc)
535  return std::error_code();
536 
537  const FunctionSamples *FS = findFunctionSamples(Inst);
538  if (!FS)
539  return std::error_code();
540 
541  // Ignore all intrinsics and branch instructions.
542  // Branch instruction usually contains debug info from sources outside of
543  // the residing basic block, thus we ignore them during annotation.
544  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
545  return std::error_code();
546 
547  // If a direct call/invoke instruction is inlined in profile
548  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
549  // it means that the inlined callsite has no sample, thus the call
550  // instruction should have 0 count.
551  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
552  !ImmutableCallSite(&Inst).isIndirectCall() &&
553  findCalleeFunctionSamples(Inst))
554  return 0;
555 
556  const DILocation *DIL = DLoc;
557  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
558  uint32_t Discriminator = DIL->getBaseDiscriminator();
559  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
560  if (R) {
561  bool FirstMark =
562  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
563  if (FirstMark) {
564  ORE->emit([&]() {
565  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
566  Remark << "Applied " << ore::NV("NumSamples", *R);
567  Remark << " samples from profile (offset: ";
568  Remark << ore::NV("LineOffset", LineOffset);
569  if (Discriminator) {
570  Remark << ".";
571  Remark << ore::NV("Discriminator", Discriminator);
572  }
573  Remark << ")";
574  return Remark;
575  });
576  }
577  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
578  << DIL->getBaseDiscriminator() << ":" << Inst
579  << " (line offset: " << LineOffset << "."
580  << DIL->getBaseDiscriminator() << " - weight: " << R.get()
581  << ")\n");
582  }
583  return R;
584 }
585 
586 /// Compute the weight of a basic block.
587 ///
588 /// The weight of basic block \p BB is the maximum weight of all the
589 /// instructions in BB.
590 ///
591 /// \param BB The basic block to query.
592 ///
593 /// \returns the weight for \p BB.
594 ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
595  uint64_t Max = 0;
596  bool HasWeight = false;
597  for (auto &I : BB->getInstList()) {
598  const ErrorOr<uint64_t> &R = getInstWeight(I);
599  if (R) {
600  Max = std::max(Max, R.get());
601  HasWeight = true;
602  }
603  }
604  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
605 }
606 
607 /// Compute and store the weights of every basic block.
608 ///
609 /// This populates the BlockWeights map by computing
610 /// the weights of every basic block in the CFG.
611 ///
612 /// \param F The function to query.
613 bool SampleProfileLoader::computeBlockWeights(Function &F) {
614  bool Changed = false;
615  LLVM_DEBUG(dbgs() << "Block weights\n");
616  for (const auto &BB : F) {
617  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
618  if (Weight) {
619  BlockWeights[&BB] = Weight.get();
620  VisitedBlocks.insert(&BB);
621  Changed = true;
622  }
623  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
624  }
625 
626  return Changed;
627 }
628 
629 /// Get the FunctionSamples for a call instruction.
630 ///
631 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
632 /// instance in which that call instruction is calling to. It contains
633 /// all samples that resides in the inlined instance. We first find the
634 /// inlined instance in which the call instruction is from, then we
635 /// traverse its children to find the callsite with the matching
636 /// location.
637 ///
638 /// \param Inst Call/Invoke instruction to query.
639 ///
640 /// \returns The FunctionSamples pointer to the inlined instance.
641 const FunctionSamples *
642 SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
643  const DILocation *DIL = Inst.getDebugLoc();
644  if (!DIL) {
645  return nullptr;
646  }
647 
648  StringRef CalleeName;
649  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
650  if (Function *Callee = CI->getCalledFunction())
651  CalleeName = Callee->getName();
652 
653  const FunctionSamples *FS = findFunctionSamples(Inst);
654  if (FS == nullptr)
655  return nullptr;
656 
658  DIL->getBaseDiscriminator()),
659  CalleeName);
660 }
661 
662 /// Returns a vector of FunctionSamples that are the indirect call targets
663 /// of \p Inst. The vector is sorted by the total number of samples. Stores
664 /// the total call count of the indirect call in \p Sum.
665 std::vector<const FunctionSamples *>
666 SampleProfileLoader::findIndirectCallFunctionSamples(
667  const Instruction &Inst, uint64_t &Sum) const {
668  const DILocation *DIL = Inst.getDebugLoc();
669  std::vector<const FunctionSamples *> R;
670 
671  if (!DIL) {
672  return R;
673  }
674 
675  const FunctionSamples *FS = findFunctionSamples(Inst);
676  if (FS == nullptr)
677  return R;
678 
679  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
680  uint32_t Discriminator = DIL->getBaseDiscriminator();
681 
682  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
683  Sum = 0;
684  if (T)
685  for (const auto &T_C : T.get())
686  Sum += T_C.second;
689  if (M->empty())
690  return R;
691  for (const auto &NameFS : *M) {
692  Sum += NameFS.second.getEntrySamples();
693  R.push_back(&NameFS.second);
694  }
695  llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
696  if (L->getEntrySamples() != R->getEntrySamples())
697  return L->getEntrySamples() > R->getEntrySamples();
698  return FunctionSamples::getGUID(L->getName()) <
700  });
701  }
702  return R;
703 }
704 
705 /// Get the FunctionSamples for an instruction.
706 ///
707 /// The FunctionSamples of an instruction \p Inst is the inlined instance
708 /// in which that instruction is coming from. We traverse the inline stack
709 /// of that instruction, and match it with the tree nodes in the profile.
710 ///
711 /// \param Inst Instruction to query.
712 ///
713 /// \returns the FunctionSamples pointer to the inlined instance.
714 const FunctionSamples *
715 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
717  const DILocation *DIL = Inst.getDebugLoc();
718  if (!DIL)
719  return Samples;
720 
721  return Samples->findFunctionSamples(DIL);
722 }
723 
724 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
725  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
726  CallSite CS(I);
727  Function *CalledFunction = CS.getCalledFunction();
728  assert(CalledFunction);
729  DebugLoc DLoc = I->getDebugLoc();
730  BasicBlock *BB = I->getParent();
731  InlineParams Params = getInlineParams();
732  Params.ComputeFullInlineCost = true;
733  // Checks if there is anything in the reachable portion of the callee at
734  // this callsite that makes this inlining potentially illegal. Need to
735  // set ComputeFullInlineCost, otherwise getInlineCost may return early
736  // when cost exceeds threshold without checking all IRs in the callee.
737  // The acutal cost does not matter because we only checks isNever() to
738  // see if it is legal to inline the callsite.
739  InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
740  None, nullptr, nullptr);
741  if (Cost.isNever()) {
742  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
743  << "incompatible inlining");
744  return false;
745  }
746  InlineFunctionInfo IFI(nullptr, &GetAC);
747  if (InlineFunction(CS, IFI)) {
748  // The call to InlineFunction erases I, so we can't pass it here.
749  ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
750  << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
751  << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
752  return true;
753  }
754  return false;
755 }
756 
757 /// Iteratively inline hot callsites of a function.
758 ///
759 /// Iteratively traverse all callsites of the function \p F, and find if
760 /// the corresponding inlined instance exists and is hot in profile. If
761 /// it is hot enough, inline the callsites and adds new callsites of the
762 /// callee into the caller. If the call is an indirect call, first promote
763 /// it to direct call. Each indirect call is limited with a single target.
764 ///
765 /// \param F function to perform iterative inlining.
766 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
767 /// inlined in the profiled binary.
768 ///
769 /// \returns True if there is any inline happened.
770 bool SampleProfileLoader::inlineHotFunctions(
771  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
772  DenseSet<Instruction *> PromotedInsns;
773  bool Changed = false;
774  while (true) {
775  bool LocalChanged = false;
777  for (auto &BB : F) {
778  bool Hot = false;
780  for (auto &I : BB.getInstList()) {
781  const FunctionSamples *FS = nullptr;
782  if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
783  !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
784  Candidates.push_back(&I);
785  if (callsiteIsHot(FS, PSI))
786  Hot = true;
787  }
788  }
789  if (Hot) {
790  CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
791  }
792  }
793  for (auto I : CIS) {
794  Function *CalledFunction = CallSite(I).getCalledFunction();
795  // Do not inline recursive calls.
796  if (CalledFunction == &F)
797  continue;
798  if (CallSite(I).isIndirectCall()) {
799  if (PromotedInsns.count(I))
800  continue;
801  uint64_t Sum;
802  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
803  if (IsThinLTOPreLink) {
804  FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
806  continue;
807  }
808  auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
809  // If it is a recursive call, we do not inline it as it could bloat
810  // the code exponentially. There is way to better handle this, e.g.
811  // clone the caller first, and inline the cloned caller if it is
812  // recursive. As llvm does not inline recursive calls, we will
813  // simply ignore it instead of handling it explicitly.
814  if (CalleeFunctionName == F.getName())
815  continue;
816 
817  const char *Reason = "Callee function not available";
818  auto R = SymbolMap.find(CalleeFunctionName);
819  if (R != SymbolMap.end() && R->getValue() &&
820  !R->getValue()->isDeclaration() &&
821  R->getValue()->getSubprogram() &&
822  isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
823  uint64_t C = FS->getEntrySamples();
824  Instruction *DI =
825  pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
826  Sum -= C;
827  PromotedInsns.insert(I);
828  // If profile mismatches, we should not attempt to inline DI.
829  if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
830  inlineCallInstruction(DI))
831  LocalChanged = true;
832  } else {
833  LLVM_DEBUG(dbgs()
834  << "\nFailed to promote indirect call to "
835  << CalleeFunctionName << " because " << Reason << "\n");
836  }
837  }
838  } else if (CalledFunction && CalledFunction->getSubprogram() &&
839  !CalledFunction->isDeclaration()) {
840  if (inlineCallInstruction(I))
841  LocalChanged = true;
842  } else if (IsThinLTOPreLink) {
843  findCalleeFunctionSamples(*I)->findInlinedFunctions(
844  InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
845  }
846  }
847  if (LocalChanged) {
848  Changed = true;
849  } else {
850  break;
851  }
852  }
853  return Changed;
854 }
855 
856 /// Find equivalence classes for the given block.
857 ///
858 /// This finds all the blocks that are guaranteed to execute the same
859 /// number of times as \p BB1. To do this, it traverses all the
860 /// descendants of \p BB1 in the dominator or post-dominator tree.
861 ///
862 /// A block BB2 will be in the same equivalence class as \p BB1 if
863 /// the following holds:
864 ///
865 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
866 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
867 /// dominate BB1 in the post-dominator tree.
868 ///
869 /// 2- Both BB2 and \p BB1 must be in the same loop.
870 ///
871 /// For every block BB2 that meets those two requirements, we set BB2's
872 /// equivalence class to \p BB1.
873 ///
874 /// \param BB1 Block to check.
875 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
876 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
877 /// with blocks from \p BB1's dominator tree, then
878 /// this is the post-dominator tree, and vice versa.
879 template <bool IsPostDom>
880 void SampleProfileLoader::findEquivalencesFor(
881  BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
883  const BasicBlock *EC = EquivalenceClass[BB1];
884  uint64_t Weight = BlockWeights[EC];
885  for (const auto *BB2 : Descendants) {
886  bool IsDomParent = DomTree->dominates(BB2, BB1);
887  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
888  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
889  EquivalenceClass[BB2] = EC;
890  // If BB2 is visited, then the entire EC should be marked as visited.
891  if (VisitedBlocks.count(BB2)) {
892  VisitedBlocks.insert(EC);
893  }
894 
895  // If BB2 is heavier than BB1, make BB2 have the same weight
896  // as BB1.
897  //
898  // Note that we don't worry about the opposite situation here
899  // (when BB2 is lighter than BB1). We will deal with this
900  // during the propagation phase. Right now, we just want to
901  // make sure that BB1 has the largest weight of all the
902  // members of its equivalence set.
903  Weight = std::max(Weight, BlockWeights[BB2]);
904  }
905  }
906  if (EC == &EC->getParent()->getEntryBlock()) {
907  BlockWeights[EC] = Samples->getHeadSamples() + 1;
908  } else {
909  BlockWeights[EC] = Weight;
910  }
911 }
912 
913 /// Find equivalence classes.
914 ///
915 /// Since samples may be missing from blocks, we can fill in the gaps by setting
916 /// the weights of all the blocks in the same equivalence class to the same
917 /// weight. To compute the concept of equivalence, we use dominance and loop
918 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
919 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
920 ///
921 /// \param F The function to query.
922 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
923  SmallVector<BasicBlock *, 8> DominatedBBs;
924  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
925  // Find equivalence sets based on dominance and post-dominance information.
926  for (auto &BB : F) {
927  BasicBlock *BB1 = &BB;
928 
929  // Compute BB1's equivalence class once.
930  if (EquivalenceClass.count(BB1)) {
931  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
932  continue;
933  }
934 
935  // By default, blocks are in their own equivalence class.
936  EquivalenceClass[BB1] = BB1;
937 
938  // Traverse all the blocks dominated by BB1. We are looking for
939  // every basic block BB2 such that:
940  //
941  // 1- BB1 dominates BB2.
942  // 2- BB2 post-dominates BB1.
943  // 3- BB1 and BB2 are in the same loop nest.
944  //
945  // If all those conditions hold, it means that BB2 is executed
946  // as many times as BB1, so they are placed in the same equivalence
947  // class by making BB2's equivalence class be BB1.
948  DominatedBBs.clear();
949  DT->getDescendants(BB1, DominatedBBs);
950  findEquivalencesFor(BB1, DominatedBBs, PDT.get());
951 
952  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
953  }
954 
955  // Assign weights to equivalence classes.
956  //
957  // All the basic blocks in the same equivalence class will execute
958  // the same number of times. Since we know that the head block in
959  // each equivalence class has the largest weight, assign that weight
960  // to all the blocks in that equivalence class.
961  LLVM_DEBUG(
962  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
963  for (auto &BI : F) {
964  const BasicBlock *BB = &BI;
965  const BasicBlock *EquivBB = EquivalenceClass[BB];
966  if (BB != EquivBB)
967  BlockWeights[BB] = BlockWeights[EquivBB];
968  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
969  }
970 }
971 
972 /// Visit the given edge to decide if it has a valid weight.
973 ///
974 /// If \p E has not been visited before, we copy to \p UnknownEdge
975 /// and increment the count of unknown edges.
976 ///
977 /// \param E Edge to visit.
978 /// \param NumUnknownEdges Current number of unknown edges.
979 /// \param UnknownEdge Set if E has not been visited before.
980 ///
981 /// \returns E's weight, if known. Otherwise, return 0.
982 uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
983  Edge *UnknownEdge) {
984  if (!VisitedEdges.count(E)) {
985  (*NumUnknownEdges)++;
986  *UnknownEdge = E;
987  return 0;
988  }
989 
990  return EdgeWeights[E];
991 }
992 
993 /// Propagate weights through incoming/outgoing edges.
994 ///
995 /// If the weight of a basic block is known, and there is only one edge
996 /// with an unknown weight, we can calculate the weight of that edge.
997 ///
998 /// Similarly, if all the edges have a known count, we can calculate the
999 /// count of the basic block, if needed.
1000 ///
1001 /// \param F Function to process.
1002 /// \param UpdateBlockCount Whether we should update basic block counts that
1003 /// has already been annotated.
1004 ///
1005 /// \returns True if new weights were assigned to edges or blocks.
1006 bool SampleProfileLoader::propagateThroughEdges(Function &F,
1007  bool UpdateBlockCount) {
1008  bool Changed = false;
1009  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
1010  for (const auto &BI : F) {
1011  const BasicBlock *BB = &BI;
1012  const BasicBlock *EC = EquivalenceClass[BB];
1013 
1014  // Visit all the predecessor and successor edges to determine
1015  // which ones have a weight assigned already. Note that it doesn't
1016  // matter that we only keep track of a single unknown edge. The
1017  // only case we are interested in handling is when only a single
1018  // edge is unknown (see setEdgeOrBlockWeight).
1019  for (unsigned i = 0; i < 2; i++) {
1020  uint64_t TotalWeight = 0;
1021  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1022  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1023 
1024  if (i == 0) {
1025  // First, visit all predecessor edges.
1026  NumTotalEdges = Predecessors[BB].size();
1027  for (auto *Pred : Predecessors[BB]) {
1028  Edge E = std::make_pair(Pred, BB);
1029  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1030  if (E.first == E.second)
1031  SelfReferentialEdge = E;
1032  }
1033  if (NumTotalEdges == 1) {
1034  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1035  }
1036  } else {
1037  // On the second round, visit all successor edges.
1038  NumTotalEdges = Successors[BB].size();
1039  for (auto *Succ : Successors[BB]) {
1040  Edge E = std::make_pair(BB, Succ);
1041  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1042  }
1043  if (NumTotalEdges == 1) {
1044  SingleEdge = std::make_pair(BB, Successors[BB][0]);
1045  }
1046  }
1047 
1048  // After visiting all the edges, there are three cases that we
1049  // can handle immediately:
1050  //
1051  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
1052  // In this case, we simply check that the sum of all the edges
1053  // is the same as BB's weight. If not, we change BB's weight
1054  // to match. Additionally, if BB had not been visited before,
1055  // we mark it visited.
1056  //
1057  // - Only one edge is unknown and BB has already been visited.
1058  // In this case, we can compute the weight of the edge by
1059  // subtracting the total block weight from all the known
1060  // edge weights. If the edges weight more than BB, then the
1061  // edge of the last remaining edge is set to zero.
1062  //
1063  // - There exists a self-referential edge and the weight of BB is
1064  // known. In this case, this edge can be based on BB's weight.
1065  // We add up all the other known edges and set the weight on
1066  // the self-referential edge as we did in the previous case.
1067  //
1068  // In any other case, we must continue iterating. Eventually,
1069  // all edges will get a weight, or iteration will stop when
1070  // it reaches SampleProfileMaxPropagateIterations.
1071  if (NumUnknownEdges <= 1) {
1072  uint64_t &BBWeight = BlockWeights[EC];
1073  if (NumUnknownEdges == 0) {
1074  if (!VisitedBlocks.count(EC)) {
1075  // If we already know the weight of all edges, the weight of the
1076  // basic block can be computed. It should be no larger than the sum
1077  // of all edge weights.
1078  if (TotalWeight > BBWeight) {
1079  BBWeight = TotalWeight;
1080  Changed = true;
1081  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
1082  << " known. Set weight for block: ";
1083  printBlockWeight(dbgs(), BB););
1084  }
1085  } else if (NumTotalEdges == 1 &&
1086  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1087  // If there is only one edge for the visited basic block, use the
1088  // block weight to adjust edge weight if edge weight is smaller.
1089  EdgeWeights[SingleEdge] = BlockWeights[EC];
1090  Changed = true;
1091  }
1092  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1093  // If there is a single unknown edge and the block has been
1094  // visited, then we can compute E's weight.
1095  if (BBWeight >= TotalWeight)
1096  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1097  else
1098  EdgeWeights[UnknownEdge] = 0;
1099  const BasicBlock *OtherEC;
1100  if (i == 0)
1101  OtherEC = EquivalenceClass[UnknownEdge.first];
1102  else
1103  OtherEC = EquivalenceClass[UnknownEdge.second];
1104  // Edge weights should never exceed the BB weights it connects.
1105  if (VisitedBlocks.count(OtherEC) &&
1106  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1107  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1108  VisitedEdges.insert(UnknownEdge);
1109  Changed = true;
1110  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
1111  printEdgeWeight(dbgs(), UnknownEdge));
1112  }
1113  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1114  // If a block Weights 0, all its in/out edges should weight 0.
1115  if (i == 0) {
1116  for (auto *Pred : Predecessors[BB]) {
1117  Edge E = std::make_pair(Pred, BB);
1118  EdgeWeights[E] = 0;
1119  VisitedEdges.insert(E);
1120  }
1121  } else {
1122  for (auto *Succ : Successors[BB]) {
1123  Edge E = std::make_pair(BB, Succ);
1124  EdgeWeights[E] = 0;
1125  VisitedEdges.insert(E);
1126  }
1127  }
1128  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1129  uint64_t &BBWeight = BlockWeights[BB];
1130  // We have a self-referential edge and the weight of BB is known.
1131  if (BBWeight >= TotalWeight)
1132  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1133  else
1134  EdgeWeights[SelfReferentialEdge] = 0;
1135  VisitedEdges.insert(SelfReferentialEdge);
1136  Changed = true;
1137  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
1138  printEdgeWeight(dbgs(), SelfReferentialEdge));
1139  }
1140  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1141  BlockWeights[EC] = TotalWeight;
1142  VisitedBlocks.insert(EC);
1143  Changed = true;
1144  }
1145  }
1146  }
1147 
1148  return Changed;
1149 }
1150 
1151 /// Build in/out edge lists for each basic block in the CFG.
1152 ///
1153 /// We are interested in unique edges. If a block B1 has multiple
1154 /// edges to another block B2, we only add a single B1->B2 edge.
1155 void SampleProfileLoader::buildEdges(Function &F) {
1156  for (auto &BI : F) {
1157  BasicBlock *B1 = &BI;
1158 
1159  // Add predecessors for B1.
1161  if (!Predecessors[B1].empty())
1162  llvm_unreachable("Found a stale predecessors list in a basic block.");
1163  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
1164  BasicBlock *B2 = *PI;
1165  if (Visited.insert(B2).second)
1166  Predecessors[B1].push_back(B2);
1167  }
1168 
1169  // Add successors for B1.
1170  Visited.clear();
1171  if (!Successors[B1].empty())
1172  llvm_unreachable("Found a stale successors list in a basic block.");
1173  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
1174  BasicBlock *B2 = *SI;
1175  if (Visited.insert(B2).second)
1176  Successors[B1].push_back(B2);
1177  }
1178  }
1179 }
1180 
1181 /// Returns the sorted CallTargetMap \p M by count in descending order.
1183  const SampleRecord::CallTargetMap &M) {
1185  for (auto I = M.begin(); I != M.end(); ++I)
1186  R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1187  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
1188  if (L.Count == R.Count)
1189  return L.Value > R.Value;
1190  else
1191  return L.Count > R.Count;
1192  });
1193  return R;
1194 }
1195 
1196 /// Propagate weights into edges
1197 ///
1198 /// The following rules are applied to every block BB in the CFG:
1199 ///
1200 /// - If BB has a single predecessor/successor, then the weight
1201 /// of that edge is the weight of the block.
1202 ///
1203 /// - If all incoming or outgoing edges are known except one, and the
1204 /// weight of the block is already known, the weight of the unknown
1205 /// edge will be the weight of the block minus the sum of all the known
1206 /// edges. If the sum of all the known edges is larger than BB's weight,
1207 /// we set the unknown edge weight to zero.
1208 ///
1209 /// - If there is a self-referential edge, and the weight of the block is
1210 /// known, the weight for that edge is set to the weight of the block
1211 /// minus the weight of the other incoming edges to that block (if
1212 /// known).
1213 void SampleProfileLoader::propagateWeights(Function &F) {
1214  bool Changed = true;
1215  unsigned I = 0;
1216 
1217  // If BB weight is larger than its corresponding loop's header BB weight,
1218  // use the BB weight to replace the loop header BB weight.
1219  for (auto &BI : F) {
1220  BasicBlock *BB = &BI;
1221  Loop *L = LI->getLoopFor(BB);
1222  if (!L) {
1223  continue;
1224  }
1225  BasicBlock *Header = L->getHeader();
1226  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1227  BlockWeights[Header] = BlockWeights[BB];
1228  }
1229  }
1230 
1231  // Before propagation starts, build, for each block, a list of
1232  // unique predecessors and successors. This is necessary to handle
1233  // identical edges in multiway branches. Since we visit all blocks and all
1234  // edges of the CFG, it is cleaner to build these lists once at the start
1235  // of the pass.
1236  buildEdges(F);
1237 
1238  // Propagate until we converge or we go past the iteration limit.
1239  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1240  Changed = propagateThroughEdges(F, false);
1241  }
1242 
1243  // The first propagation propagates BB counts from annotated BBs to unknown
1244  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1245  // to propagate edge weights.
1246  VisitedEdges.clear();
1247  Changed = true;
1248  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1249  Changed = propagateThroughEdges(F, false);
1250  }
1251 
1252  // The 3rd propagation pass allows adjust annotated BB weights that are
1253  // obviously wrong.
1254  Changed = true;
1255  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1256  Changed = propagateThroughEdges(F, true);
1257  }
1258 
1259  // Generate MD_prof metadata for every branch instruction using the
1260  // edge weights computed during propagation.
1261  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1262  LLVMContext &Ctx = F.getContext();
1263  MDBuilder MDB(Ctx);
1264  for (auto &BI : F) {
1265  BasicBlock *BB = &BI;
1266 
1267  if (BlockWeights[BB]) {
1268  for (auto &I : BB->getInstList()) {
1269  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1270  continue;
1271  CallSite CS(&I);
1272  if (!CS.getCalledFunction()) {
1273  const DebugLoc &DLoc = I.getDebugLoc();
1274  if (!DLoc)
1275  continue;
1276  const DILocation *DIL = DLoc;
1277  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
1278  uint32_t Discriminator = DIL->getBaseDiscriminator();
1279 
1280  const FunctionSamples *FS = findFunctionSamples(I);
1281  if (!FS)
1282  continue;
1283  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
1284  if (!T || T.get().empty())
1285  continue;
1286  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1287  SortCallTargets(T.get());
1288  uint64_t Sum;
1289  findIndirectCallFunctionSamples(I, Sum);
1290  annotateValueSite(*I.getParent()->getParent()->getParent(), I,
1291  SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1292  SortedCallTargets.size());
1293  } else if (!dyn_cast<IntrinsicInst>(&I)) {
1294  SmallVector<uint32_t, 1> Weights;
1295  Weights.push_back(BlockWeights[BB]);
1296  I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1297  }
1298  }
1299  }
1300  Instruction *TI = BB->getTerminator();
1301  if (TI->getNumSuccessors() == 1)
1302  continue;
1303  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1304  continue;
1305 
1306  DebugLoc BranchLoc = TI->getDebugLoc();
1307  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1308  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1309  : Twine("<UNKNOWN LOCATION>"))
1310  << ".\n");
1311  SmallVector<uint32_t, 4> Weights;
1312  uint32_t MaxWeight = 0;
1313  Instruction *MaxDestInst;
1314  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1315  BasicBlock *Succ = TI->getSuccessor(I);
1316  Edge E = std::make_pair(BB, Succ);
1317  uint64_t Weight = EdgeWeights[E];
1318  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1319  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1320  // if needed. Sample counts in profiles are 64-bit unsigned values,
1321  // but internally branch weights are expressed as 32-bit values.
1322  if (Weight > std::numeric_limits<uint32_t>::max()) {
1323  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1325  }
1326  // Weight is added by one to avoid propagation errors introduced by
1327  // 0 weights.
1328  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1329  if (Weight != 0) {
1330  if (Weight > MaxWeight) {
1331  MaxWeight = Weight;
1332  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1333  }
1334  }
1335  }
1336 
1337  uint64_t TempWeight;
1338  // Only set weights if there is at least one non-zero weight.
1339  // In any other case, let the analyzer set weights.
1340  // Do not set weights if the weights are present. In ThinLTO, the profile
1341  // annotation is done twice. If the first annotation already set the
1342  // weights, the second pass does not need to set it.
1343  if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1344  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1346  MDB.createBranchWeights(Weights));
1347  ORE->emit([&]() {
1348  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1349  << "most popular destination for conditional branches at "
1350  << ore::NV("CondBranchesLoc", BranchLoc);
1351  });
1352  } else {
1353  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1354  }
1355  }
1356 }
1357 
1358 /// Get the line number for the function header.
1359 ///
1360 /// This looks up function \p F in the current compilation unit and
1361 /// retrieves the line number where the function is defined. This is
1362 /// line 0 for all the samples read from the profile file. Every line
1363 /// number is relative to this line.
1364 ///
1365 /// \param F Function object to query.
1366 ///
1367 /// \returns the line number where \p F is defined. If it returns 0,
1368 /// it means that there is no debug information available for \p F.
1369 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
1370  if (DISubprogram *S = F.getSubprogram())
1371  return S->getLine();
1372 
1373  if (NoWarnSampleUnused)
1374  return 0;
1375 
1376  // If the start of \p F is missing, emit a diagnostic to inform the user
1377  // about the missed opportunity.
1379  "No debug information found in function " + F.getName() +
1380  ": Function profile not used",
1381  DS_Warning));
1382  return 0;
1383 }
1384 
1385 void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
1386  DT.reset(new DominatorTree);
1387  DT->recalculate(F);
1388 
1389  PDT.reset(new PostDominatorTree(F));
1390 
1391  LI.reset(new LoopInfo);
1392  LI->analyze(*DT);
1393 }
1394 
1395 /// Generate branch weight metadata for all branches in \p F.
1396 ///
1397 /// Branch weights are computed out of instruction samples using a
1398 /// propagation heuristic. Propagation proceeds in 3 phases:
1399 ///
1400 /// 1- Assignment of block weights. All the basic blocks in the function
1401 /// are initial assigned the same weight as their most frequently
1402 /// executed instruction.
1403 ///
1404 /// 2- Creation of equivalence classes. Since samples may be missing from
1405 /// blocks, we can fill in the gaps by setting the weights of all the
1406 /// blocks in the same equivalence class to the same weight. To compute
1407 /// the concept of equivalence, we use dominance and loop information.
1408 /// Two blocks B1 and B2 are in the same equivalence class if B1
1409 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1410 ///
1411 /// 3- Propagation of block weights into edges. This uses a simple
1412 /// propagation heuristic. The following rules are applied to every
1413 /// block BB in the CFG:
1414 ///
1415 /// - If BB has a single predecessor/successor, then the weight
1416 /// of that edge is the weight of the block.
1417 ///
1418 /// - If all the edges are known except one, and the weight of the
1419 /// block is already known, the weight of the unknown edge will
1420 /// be the weight of the block minus the sum of all the known
1421 /// edges. If the sum of all the known edges is larger than BB's weight,
1422 /// we set the unknown edge weight to zero.
1423 ///
1424 /// - If there is a self-referential edge, and the weight of the block is
1425 /// known, the weight for that edge is set to the weight of the block
1426 /// minus the weight of the other incoming edges to that block (if
1427 /// known).
1428 ///
1429 /// Since this propagation is not guaranteed to finalize for every CFG, we
1430 /// only allow it to proceed for a limited number of iterations (controlled
1431 /// by -sample-profile-max-propagate-iterations).
1432 ///
1433 /// FIXME: Try to replace this propagation heuristic with a scheme
1434 /// that is guaranteed to finalize. A work-list approach similar to
1435 /// the standard value propagation algorithm used by SSA-CCP might
1436 /// work here.
1437 ///
1438 /// Once all the branch weights are computed, we emit the MD_prof
1439 /// metadata on BB using the computed values for each of its branches.
1440 ///
1441 /// \param F The function to query.
1442 ///
1443 /// \returns true if \p F was modified. Returns false, otherwise.
1444 bool SampleProfileLoader::emitAnnotations(Function &F) {
1445  bool Changed = false;
1446 
1447  if (getFunctionLoc(F) == 0)
1448  return false;
1449 
1450  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1451  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1452 
1453  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1454  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1455 
1456  // Compute basic block weights.
1457  Changed |= computeBlockWeights(F);
1458 
1459  if (Changed) {
1460  // Add an entry count to the function using the samples gathered at the
1461  // function entry.
1462  // Sets the GUIDs that are inlined in the profiled binary. This is used
1463  // for ThinLink to make correct liveness analysis, and also make the IR
1464  // match the profiled binary before annotation.
1465  F.setEntryCount(
1466  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1467  &InlinedGUIDs);
1468 
1469  // Compute dominance and loop info needed for propagation.
1470  computeDominanceAndLoopInfo(F);
1471 
1472  // Find equivalence classes.
1473  findEquivalenceClasses(F);
1474 
1475  // Propagate weights to all edges.
1476  propagateWeights(F);
1477  }
1478 
1479  // If coverage checking was requested, compute it now.
1481  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1482  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1483  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1484  if (Coverage < SampleProfileRecordCoverage) {
1486  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1487  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1488  Twine(Coverage) + "%) were applied",
1489  DS_Warning));
1490  }
1491  }
1492 
1494  uint64_t Used = CoverageTracker.getTotalUsedSamples();
1495  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1496  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1497  if (Coverage < SampleProfileSampleCoverage) {
1499  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1500  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1501  Twine(Coverage) + "%) were applied",
1502  DS_Warning));
1503  }
1504  }
1505  return Changed;
1506 }
1507 
1509 
1510 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1511  "Sample Profile loader", false, false)
1515 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1516  "Sample Profile loader", false, false)
1517 
1518 bool SampleProfileLoader::doInitialization(Module &M) {
1519  auto &Ctx = M.getContext();
1520  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
1521  if (std::error_code EC = ReaderOrErr.getError()) {
1522  std::string Msg = "Could not open profile: " + EC.message();
1523  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1524  return false;
1525  }
1526  Reader = std::move(ReaderOrErr.get());
1527  Reader->collectFuncsToUse(M);
1528  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1529 
1530  if (!RemappingFilename.empty()) {
1531  // Apply profile remappings to the loaded profile data if requested.
1532  // For now, we only support remapping symbols encoded using the Itanium
1533  // C++ ABI's name mangling scheme.
1535  RemappingFilename, Ctx, std::move(Reader));
1536  if (std::error_code EC = ReaderOrErr.getError()) {
1537  std::string Msg = "Could not open profile remapping file: " + EC.message();
1538  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1539  return false;
1540  }
1541  Reader = std::move(ReaderOrErr.get());
1542  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1543  }
1544  return true;
1545 }
1546 
1548  return new SampleProfileLoaderLegacyPass();
1549 }
1550 
1552  return new SampleProfileLoaderLegacyPass(Name);
1553 }
1554 
1555 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1556  ProfileSummaryInfo *_PSI) {
1558  if (!ProfileIsValid)
1559  return false;
1560 
1561  PSI = _PSI;
1562  if (M.getProfileSummary() == nullptr)
1563  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
1564 
1565  // Compute the total number of samples collected in this profile.
1566  for (const auto &I : Reader->getProfiles())
1567  TotalCollectedSamples += I.second.getTotalSamples();
1568 
1569  // Populate the symbol map.
1570  for (const auto &N_F : M.getValueSymbolTable()) {
1571  StringRef OrigName = N_F.getKey();
1572  Function *F = dyn_cast<Function>(N_F.getValue());
1573  if (F == nullptr)
1574  continue;
1575  SymbolMap[OrigName] = F;
1576  auto pos = OrigName.find('.');
1577  if (pos != StringRef::npos) {
1578  StringRef NewName = OrigName.substr(0, pos);
1579  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1580  // Failiing to insert means there is already an entry in SymbolMap,
1581  // thus there are multiple functions that are mapped to the same
1582  // stripped name. In this case of name conflicting, set the value
1583  // to nullptr to avoid confusion.
1584  if (!r.second)
1585  r.first->second = nullptr;
1586  }
1587  }
1588 
1589  bool retval = false;
1590  for (auto &F : M)
1591  if (!F.isDeclaration()) {
1592  clearFunctionData();
1593  retval |= runOnFunction(F, AM);
1594  }
1595  return retval;
1596 }
1597 
1598 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1599  ACT = &getAnalysis<AssumptionCacheTracker>();
1600  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1601  ProfileSummaryInfo *PSI =
1602  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1603  return SampleLoader.runOnModule(M, nullptr, PSI);
1604 }
1605 
1607  // Initialize the entry count to -1, which will be treated conservatively
1608  // by getEntryCount as the same as unknown (None). If we have samples this
1609  // will be overwritten in emitAnnotations.
1611  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1612  if (AM) {
1613  auto &FAM =
1615  .getManager();
1616  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1617  } else {
1618  OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
1619  ORE = OwnedORE.get();
1620  }
1621  Samples = Reader->getSamplesFor(F);
1622  if (Samples && !Samples->empty())
1623  return emitAnnotations(F);
1624  return false;
1625 }
1626 
1628  ModuleAnalysisManager &AM) {
1630  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1631 
1632  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1633  return FAM.getResult<AssumptionAnalysis>(F);
1634  };
1635  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1636  return FAM.getResult<TargetIRAnalysis>(F);
1637  };
1638 
1639  SampleProfileLoader SampleLoader(
1640  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1641  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1642  : ProfileRemappingFileName,
1643  IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1644 
1645  SampleLoader.doInitialization(M);
1646 
1648  if (!SampleLoader.runOnModule(M, &AM, PSI))
1649  return PreservedAnalyses::all();
1650 
1651  return PreservedAnalyses::none();
1652 }
const NoneType None
Definition: None.h:24
uint64_t CallInst * C
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
Definition: SampleProf.h:284
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:490
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:155
Represents either an error or a value T.
Definition: ErrorOr.h:57
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:106
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Definition: Module.h:555
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
unsigned getLine() const
Definition: DebugLoc.cpp:26
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
Definition: SampleProf.h:257
A debug info location.
Definition: DebugLoc.h:34
F(f)
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:383
static SmallVector< InstrProfValueData, 2 > SortCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:177
bool isHotCount(uint64_t C)
Returns true if F is a hot function.
Represents the cost of inlining a function.
Definition: InlineCost.h:65
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Representation of the samples collected for a function.
Definition: SampleProf.h:217
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:196
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Definition: BitVector.h:938
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1366
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:243
Diagnostic information for optimization analysis remarks.
static StringRef getName(Value *V)
Metadata * getProfileSummary()
Returns profile summary metadata.
Definition: Module.cpp:533
StringRef getFilename() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
Subprogram description.
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
Definition: BasicBlock.cpp:204
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1340
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
Debug location.
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:157
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:151
Core dominator tree base class.
Definition: LoopInfo.h:61
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1508
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:351
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:48
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:812
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
Class to represent profile counts.
Definition: Function.h:261
size_t size() const
Definition: SmallVector.h:53
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Definition: SampleProf.h:410
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:182
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:972
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
Definition: SampleProf.h:295
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:328
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:165
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:270
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:220
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:212
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void setProfileSummary(Metadata *M)
Attach profile summary metadata to this module.
Definition: Module.cpp:529
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
#define DEBUG_TYPE
static const size_t npos
Definition: StringRef.h:51
iterator begin()
Definition: StringMap.h:315
Represents the relative location of an instruction.
Definition: SampleProf.h:118
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
static bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
Establish a view to a call site for examination.
Definition: CallSite.h:714
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
iterator end()
Definition: DenseMap.h:84
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
std::map< std::string, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:209
StringRef getName() const
Return the function name.
Definition: SampleProf.h:407
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:92
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
Provides ErrorOr<T> smart pointer.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:321
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
Definition: SampleProf.h:354
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:332
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:298
#define LLVM_DEBUG(X)
Definition: Debug.h:123
The optimization diagnostic interface.
iterator end()
Definition: StringMap.h:318
This file provides the interface for the sampled PGO loader pass.
reference get()
Definition: ErrorOr.h:157
const BasicBlock * getParent() const
Definition: Instruction.h:67
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038