LLVM  9.0.0svn
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileLoader transformation. This pass
10 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12 // profile information in the given profile.
13 //
14 // This pass generates branch weight annotations on the IR:
15 //
16 // - prof: Represents branch weights. This annotation is added to branches
17 // to indicate the weights of each edge coming out of the branch.
18 // The weight of each edge is the weight of the target block for
19 // that edge. The weight of a block B is computed as the maximum
20 // number of samples found in B.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
37 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CFG.h"
44 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/MDBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/PassManager.h"
60 #include "llvm/Pass.h"
64 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/ErrorOr.h"
71 #include "llvm/Transforms/IPO.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <functional>
79 #include <limits>
80 #include <map>
81 #include <memory>
82 #include <string>
83 #include <system_error>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 using namespace sampleprof;
90 #define DEBUG_TYPE "sample-profile"
91 
92 // Command line option to specify the file to read samples from. This is
93 // mainly used for debugging.
95  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
96  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
97 
98 // The named file contains a set of transformations that may have been applied
99 // to the symbol names between the program from which the sample data was
100 // collected and the current program's symbols.
102  "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
103  cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
104 
106  "sample-profile-max-propagate-iterations", cl::init(100),
107  cl::desc("Maximum number of iterations to go through when propagating "
108  "sample block/edge weights through the CFG."));
109 
111  "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
112  cl::desc("Emit a warning if less than N% of records in the input profile "
113  "are matched to the IR."));
114 
116  "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
117  cl::desc("Emit a warning if less than N% of samples in the input profile "
118  "are matched to the IR."));
119 
121  "no-warn-sample-unused", cl::init(false), cl::Hidden,
122  cl::desc("Use this option to turn off/on warnings about function with "
123  "samples but without debug information to use those samples. "));
124 
126  "profile-sample-accurate", cl::Hidden, cl::init(false),
127  cl::desc("If the sample profile is accurate, we will mark all un-sampled "
128  "callsite and function as having 0 samples. Otherwise, treat "
129  "un-sampled callsites and functions conservatively as unknown. "));
130 
131 namespace {
132 
133 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
134 using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
135 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
136 using EdgeWeightMap = DenseMap<Edge, uint64_t>;
137 using BlockEdgeMap =
139 
140 class SampleCoverageTracker {
141 public:
142  SampleCoverageTracker() = default;
143 
144  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
145  uint32_t Discriminator, uint64_t Samples);
146  unsigned computeCoverage(unsigned Used, unsigned Total) const;
147  unsigned countUsedRecords(const FunctionSamples *FS,
148  ProfileSummaryInfo *PSI) const;
149  unsigned countBodyRecords(const FunctionSamples *FS,
150  ProfileSummaryInfo *PSI) const;
151  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
152  uint64_t countBodySamples(const FunctionSamples *FS,
153  ProfileSummaryInfo *PSI) const;
154 
155  void clear() {
156  SampleCoverage.clear();
157  TotalUsedSamples = 0;
158  }
159 
160 private:
161  using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
162  using FunctionSamplesCoverageMap =
164 
165  /// Coverage map for sampling records.
166  ///
167  /// This map keeps a record of sampling records that have been matched to
168  /// an IR instruction. This is used to detect some form of staleness in
169  /// profiles (see flag -sample-profile-check-coverage).
170  ///
171  /// Each entry in the map corresponds to a FunctionSamples instance. This is
172  /// another map that counts how many times the sample record at the
173  /// given location has been used.
174  FunctionSamplesCoverageMap SampleCoverage;
175 
176  /// Number of samples used from the profile.
177  ///
178  /// When a sampling record is used for the first time, the samples from
179  /// that record are added to this accumulator. Coverage is later computed
180  /// based on the total number of samples available in this function and
181  /// its callsites.
182  ///
183  /// Note that this accumulator tracks samples used from a single function
184  /// and all the inlined callsites. Strictly, we should have a map of counters
185  /// keyed by FunctionSamples pointers, but these stats are cleared after
186  /// every function, so we just need to keep a single counter.
187  uint64_t TotalUsedSamples = 0;
188 };
189 
190 /// Sample profile pass.
191 ///
192 /// This pass reads profile data from the file specified by
193 /// -sample-profile-file and annotates every affected function with the
194 /// profile information found in that file.
195 class SampleProfileLoader {
196 public:
197  SampleProfileLoader(
198  StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
199  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
200  std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
201  : GetAC(std::move(GetAssumptionCache)),
202  GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
203  RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
204 
205  bool doInitialization(Module &M);
206  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
207  ProfileSummaryInfo *_PSI);
208 
209  void dump() { Reader->dump(); }
210 
211 protected:
213  unsigned getFunctionLoc(Function &F);
214  bool emitAnnotations(Function &F);
215  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
216  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
217  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
218  std::vector<const FunctionSamples *>
219  findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
220  mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
221  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
222  bool inlineCallInstruction(Instruction *I);
223  bool inlineHotFunctions(Function &F,
224  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
225  void printEdgeWeight(raw_ostream &OS, Edge E);
226  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
227  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
228  bool computeBlockWeights(Function &F);
229  void findEquivalenceClasses(Function &F);
230  template <bool IsPostDom>
231  void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
233 
234  void propagateWeights(Function &F);
235  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
236  void buildEdges(Function &F);
237  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
238  void computeDominanceAndLoopInfo(Function &F);
239  void clearFunctionData();
240 
241  /// Map basic blocks to their computed weights.
242  ///
243  /// The weight of a basic block is defined to be the maximum
244  /// of all the instruction weights in that block.
245  BlockWeightMap BlockWeights;
246 
247  /// Map edges to their computed weights.
248  ///
249  /// Edge weights are computed by propagating basic block weights in
250  /// SampleProfile::propagateWeights.
251  EdgeWeightMap EdgeWeights;
252 
253  /// Set of visited blocks during propagation.
255 
256  /// Set of visited edges during propagation.
257  SmallSet<Edge, 32> VisitedEdges;
258 
259  /// Equivalence classes for block weights.
260  ///
261  /// Two blocks BB1 and BB2 are in the same equivalence class if they
262  /// dominate and post-dominate each other, and they are in the same loop
263  /// nest. When this happens, the two blocks are guaranteed to execute
264  /// the same number of times.
265  EquivalenceClassMap EquivalenceClass;
266 
267  /// Map from function name to Function *. Used to find the function from
268  /// the function name. If the function name contains suffix, additional
269  /// entry is added to map from the stripped name to the function if there
270  /// is one-to-one mapping.
272 
273  /// Dominance, post-dominance and loop information.
274  std::unique_ptr<DominatorTree> DT;
275  std::unique_ptr<PostDominatorTree> PDT;
276  std::unique_ptr<LoopInfo> LI;
277 
278  std::function<AssumptionCache &(Function &)> GetAC;
279  std::function<TargetTransformInfo &(Function &)> GetTTI;
280 
281  /// Predecessors for each basic block in the CFG.
282  BlockEdgeMap Predecessors;
283 
284  /// Successors for each basic block in the CFG.
285  BlockEdgeMap Successors;
286 
287  SampleCoverageTracker CoverageTracker;
288 
289  /// Profile reader object.
290  std::unique_ptr<SampleProfileReader> Reader;
291 
292  /// Samples collected for the body of this function.
293  FunctionSamples *Samples = nullptr;
294 
295  /// Name of the profile file to load.
296  std::string Filename;
297 
298  /// Name of the profile remapping file to load.
299  std::string RemappingFilename;
300 
301  /// Flag indicating whether the profile input loaded successfully.
302  bool ProfileIsValid = false;
303 
304  /// Flag indicating if the pass is invoked in ThinLTO compile phase.
305  ///
306  /// In this phase, in annotation, we should not promote indirect calls.
307  /// Instead, we will mark GUIDs that needs to be annotated to the function.
308  bool IsThinLTOPreLink;
309 
310  /// Profile Summary Info computed from sample profile.
311  ProfileSummaryInfo *PSI = nullptr;
312 
313  /// Total number of samples collected in this profile.
314  ///
315  /// This is the sum of all the samples collected in all the functions executed
316  /// at runtime.
317  uint64_t TotalCollectedSamples = 0;
318 
319  /// Optimization Remark Emitter used to emit diagnostic remarks.
320  OptimizationRemarkEmitter *ORE = nullptr;
321 
322  // Information recorded when we declined to inline a call site
323  // because we have determined it is too cold is accumulated for
324  // each callee function. Initially this is just the entry count.
325  struct NotInlinedProfileInfo {
326  uint64_t entryCount;
327  };
329 };
330 
331 class SampleProfileLoaderLegacyPass : public ModulePass {
332 public:
333  // Class identification, replacement for typeinfo
334  static char ID;
335 
336  SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
337  bool IsThinLTOPreLink = false)
338  : ModulePass(ID),
339  SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
340  [&](Function &F) -> AssumptionCache & {
341  return ACT->getAssumptionCache(F);
342  },
343  [&](Function &F) -> TargetTransformInfo & {
344  return TTIWP->getTTI(F);
345  }) {
348  }
349 
350  void dump() { SampleLoader.dump(); }
351 
352  bool doInitialization(Module &M) override {
353  return SampleLoader.doInitialization(M);
354  }
355 
356  StringRef getPassName() const override { return "Sample profile pass"; }
357  bool runOnModule(Module &M) override;
358 
359  void getAnalysisUsage(AnalysisUsage &AU) const override {
363  }
364 
365 private:
366  SampleProfileLoader SampleLoader;
367  AssumptionCacheTracker *ACT = nullptr;
368  TargetTransformInfoWrapperPass *TTIWP = nullptr;
369 };
370 
371 } // end anonymous namespace
372 
373 /// Return true if the given callsite is hot wrt to hot cutoff threshold.
374 ///
375 /// Functions that were inlined in the original binary will be represented
376 /// in the inline stack in the sample profile. If the profile shows that
377 /// the original inline decision was "good" (i.e., the callsite is executed
378 /// frequently), then we will recreate the inline decision and apply the
379 /// profile from the inlined callsite.
380 ///
381 /// To decide whether an inlined callsite is hot, we compare the callsite
382 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
383 /// regarded as hot if the count is above the cutoff value.
384 static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
385  ProfileSummaryInfo *PSI) {
386  if (!CallsiteFS)
387  return false; // The callsite was not inlined in the original binary.
388 
389  assert(PSI && "PSI is expected to be non null");
390  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
391  return PSI->isHotCount(CallsiteTotalSamples);
392 }
393 
394 /// Mark as used the sample record for the given function samples at
395 /// (LineOffset, Discriminator).
396 ///
397 /// \returns true if this is the first time we mark the given record.
398 bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
399  uint32_t LineOffset,
400  uint32_t Discriminator,
401  uint64_t Samples) {
402  LineLocation Loc(LineOffset, Discriminator);
403  unsigned &Count = SampleCoverage[FS][Loc];
404  bool FirstTime = (++Count == 1);
405  if (FirstTime)
406  TotalUsedSamples += Samples;
407  return FirstTime;
408 }
409 
410 /// Return the number of sample records that were applied from this profile.
411 ///
412 /// This count does not include records from cold inlined callsites.
413 unsigned
414 SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
415  ProfileSummaryInfo *PSI) const {
416  auto I = SampleCoverage.find(FS);
417 
418  // The size of the coverage map for FS represents the number of records
419  // that were marked used at least once.
420  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
421 
422  // If there are inlined callsites in this function, count the samples found
423  // in the respective bodies. However, do not bother counting callees with 0
424  // total samples, these are callees that were never invoked at runtime.
425  for (const auto &I : FS->getCallsiteSamples())
426  for (const auto &J : I.second) {
427  const FunctionSamples *CalleeSamples = &J.second;
428  if (callsiteIsHot(CalleeSamples, PSI))
429  Count += countUsedRecords(CalleeSamples, PSI);
430  }
431 
432  return Count;
433 }
434 
435 /// Return the number of sample records in the body of this profile.
436 ///
437 /// This count does not include records from cold inlined callsites.
438 unsigned
439 SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
440  ProfileSummaryInfo *PSI) const {
441  unsigned Count = FS->getBodySamples().size();
442 
443  // Only count records in hot callsites.
444  for (const auto &I : FS->getCallsiteSamples())
445  for (const auto &J : I.second) {
446  const FunctionSamples *CalleeSamples = &J.second;
447  if (callsiteIsHot(CalleeSamples, PSI))
448  Count += countBodyRecords(CalleeSamples, PSI);
449  }
450 
451  return Count;
452 }
453 
454 /// Return the number of samples collected in the body of this profile.
455 ///
456 /// This count does not include samples from cold inlined callsites.
457 uint64_t
458 SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
459  ProfileSummaryInfo *PSI) const {
460  uint64_t Total = 0;
461  for (const auto &I : FS->getBodySamples())
462  Total += I.second.getSamples();
463 
464  // Only count samples in hot callsites.
465  for (const auto &I : FS->getCallsiteSamples())
466  for (const auto &J : I.second) {
467  const FunctionSamples *CalleeSamples = &J.second;
468  if (callsiteIsHot(CalleeSamples, PSI))
469  Total += countBodySamples(CalleeSamples, PSI);
470  }
471 
472  return Total;
473 }
474 
475 /// Return the fraction of sample records used in this profile.
476 ///
477 /// The returned value is an unsigned integer in the range 0-100 indicating
478 /// the percentage of sample records that were used while applying this
479 /// profile to the associated function.
480 unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
481  unsigned Total) const {
482  assert(Used <= Total &&
483  "number of used records cannot exceed the total number of records");
484  return Total > 0 ? Used * 100 / Total : 100;
485 }
486 
487 /// Clear all the per-function data used to load samples and propagate weights.
488 void SampleProfileLoader::clearFunctionData() {
489  BlockWeights.clear();
490  EdgeWeights.clear();
491  VisitedBlocks.clear();
492  VisitedEdges.clear();
493  EquivalenceClass.clear();
494  DT = nullptr;
495  PDT = nullptr;
496  LI = nullptr;
497  Predecessors.clear();
498  Successors.clear();
499  CoverageTracker.clear();
500 }
501 
502 #ifndef NDEBUG
503 /// Print the weight of edge \p E on stream \p OS.
504 ///
505 /// \param OS Stream to emit the output to.
506 /// \param E Edge to print.
507 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
508  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
509  << "]: " << EdgeWeights[E] << "\n";
510 }
511 
512 /// Print the equivalence class of block \p BB on stream \p OS.
513 ///
514 /// \param OS Stream to emit the output to.
515 /// \param BB Block to print.
516 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
517  const BasicBlock *BB) {
518  const BasicBlock *Equiv = EquivalenceClass[BB];
519  OS << "equivalence[" << BB->getName()
520  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
521 }
522 
523 /// Print the weight of block \p BB on stream \p OS.
524 ///
525 /// \param OS Stream to emit the output to.
526 /// \param BB Block to print.
527 void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
528  const BasicBlock *BB) const {
529  const auto &I = BlockWeights.find(BB);
530  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
531  OS << "weight[" << BB->getName() << "]: " << W << "\n";
532 }
533 #endif
534 
535 /// Get the weight for an instruction.
536 ///
537 /// The "weight" of an instruction \p Inst is the number of samples
538 /// collected on that instruction at runtime. To retrieve it, we
539 /// need to compute the line number of \p Inst relative to the start of its
540 /// function. We use HeaderLineno to compute the offset. We then
541 /// look up the samples collected for \p Inst using BodySamples.
542 ///
543 /// \param Inst Instruction to query.
544 ///
545 /// \returns the weight of \p Inst.
546 ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
547  const DebugLoc &DLoc = Inst.getDebugLoc();
548  if (!DLoc)
549  return std::error_code();
550 
551  const FunctionSamples *FS = findFunctionSamples(Inst);
552  if (!FS)
553  return std::error_code();
554 
555  // Ignore all intrinsics, phinodes and branch instructions.
556  // Branch and phinodes instruction usually contains debug info from sources outside of
557  // the residing basic block, thus we ignore them during annotation.
558  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
559  return std::error_code();
560 
561  // If a direct call/invoke instruction is inlined in profile
562  // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
563  // it means that the inlined callsite has no sample, thus the call
564  // instruction should have 0 count.
565  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
566  !ImmutableCallSite(&Inst).isIndirectCall() &&
567  findCalleeFunctionSamples(Inst))
568  return 0;
569 
570  const DILocation *DIL = DLoc;
571  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
572  uint32_t Discriminator = DIL->getBaseDiscriminator();
573  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
574  if (R) {
575  bool FirstMark =
576  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
577  if (FirstMark) {
578  ORE->emit([&]() {
579  OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
580  Remark << "Applied " << ore::NV("NumSamples", *R);
581  Remark << " samples from profile (offset: ";
582  Remark << ore::NV("LineOffset", LineOffset);
583  if (Discriminator) {
584  Remark << ".";
585  Remark << ore::NV("Discriminator", Discriminator);
586  }
587  Remark << ")";
588  return Remark;
589  });
590  }
591  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
592  << DIL->getBaseDiscriminator() << ":" << Inst
593  << " (line offset: " << LineOffset << "."
594  << DIL->getBaseDiscriminator() << " - weight: " << R.get()
595  << ")\n");
596  }
597  return R;
598 }
599 
600 /// Compute the weight of a basic block.
601 ///
602 /// The weight of basic block \p BB is the maximum weight of all the
603 /// instructions in BB.
604 ///
605 /// \param BB The basic block to query.
606 ///
607 /// \returns the weight for \p BB.
608 ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
609  uint64_t Max = 0;
610  bool HasWeight = false;
611  for (auto &I : BB->getInstList()) {
612  const ErrorOr<uint64_t> &R = getInstWeight(I);
613  if (R) {
614  Max = std::max(Max, R.get());
615  HasWeight = true;
616  }
617  }
618  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
619 }
620 
621 /// Compute and store the weights of every basic block.
622 ///
623 /// This populates the BlockWeights map by computing
624 /// the weights of every basic block in the CFG.
625 ///
626 /// \param F The function to query.
627 bool SampleProfileLoader::computeBlockWeights(Function &F) {
628  bool Changed = false;
629  LLVM_DEBUG(dbgs() << "Block weights\n");
630  for (const auto &BB : F) {
631  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
632  if (Weight) {
633  BlockWeights[&BB] = Weight.get();
634  VisitedBlocks.insert(&BB);
635  Changed = true;
636  }
637  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
638  }
639 
640  return Changed;
641 }
642 
643 /// Get the FunctionSamples for a call instruction.
644 ///
645 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
646 /// instance in which that call instruction is calling to. It contains
647 /// all samples that resides in the inlined instance. We first find the
648 /// inlined instance in which the call instruction is from, then we
649 /// traverse its children to find the callsite with the matching
650 /// location.
651 ///
652 /// \param Inst Call/Invoke instruction to query.
653 ///
654 /// \returns The FunctionSamples pointer to the inlined instance.
655 const FunctionSamples *
656 SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
657  const DILocation *DIL = Inst.getDebugLoc();
658  if (!DIL) {
659  return nullptr;
660  }
661 
662  StringRef CalleeName;
663  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
664  if (Function *Callee = CI->getCalledFunction())
665  CalleeName = Callee->getName();
666 
667  const FunctionSamples *FS = findFunctionSamples(Inst);
668  if (FS == nullptr)
669  return nullptr;
670 
672  DIL->getBaseDiscriminator()),
673  CalleeName);
674 }
675 
676 /// Returns a vector of FunctionSamples that are the indirect call targets
677 /// of \p Inst. The vector is sorted by the total number of samples. Stores
678 /// the total call count of the indirect call in \p Sum.
679 std::vector<const FunctionSamples *>
680 SampleProfileLoader::findIndirectCallFunctionSamples(
681  const Instruction &Inst, uint64_t &Sum) const {
682  const DILocation *DIL = Inst.getDebugLoc();
683  std::vector<const FunctionSamples *> R;
684 
685  if (!DIL) {
686  return R;
687  }
688 
689  const FunctionSamples *FS = findFunctionSamples(Inst);
690  if (FS == nullptr)
691  return R;
692 
693  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
694  uint32_t Discriminator = DIL->getBaseDiscriminator();
695 
696  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
697  Sum = 0;
698  if (T)
699  for (const auto &T_C : T.get())
700  Sum += T_C.second;
703  if (M->empty())
704  return R;
705  for (const auto &NameFS : *M) {
706  Sum += NameFS.second.getEntrySamples();
707  R.push_back(&NameFS.second);
708  }
709  llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
710  if (L->getEntrySamples() != R->getEntrySamples())
711  return L->getEntrySamples() > R->getEntrySamples();
712  return FunctionSamples::getGUID(L->getName()) <
714  });
715  }
716  return R;
717 }
718 
719 /// Get the FunctionSamples for an instruction.
720 ///
721 /// The FunctionSamples of an instruction \p Inst is the inlined instance
722 /// in which that instruction is coming from. We traverse the inline stack
723 /// of that instruction, and match it with the tree nodes in the profile.
724 ///
725 /// \param Inst Instruction to query.
726 ///
727 /// \returns the FunctionSamples pointer to the inlined instance.
728 const FunctionSamples *
729 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
730  const DILocation *DIL = Inst.getDebugLoc();
731  if (!DIL)
732  return Samples;
733 
734  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
735  if (it.second)
736  it.first->second = Samples->findFunctionSamples(DIL);
737  return it.first->second;
738 }
739 
740 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
741  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
742  CallSite CS(I);
743  Function *CalledFunction = CS.getCalledFunction();
744  assert(CalledFunction);
745  DebugLoc DLoc = I->getDebugLoc();
746  BasicBlock *BB = I->getParent();
747  InlineParams Params = getInlineParams();
748  Params.ComputeFullInlineCost = true;
749  // Checks if there is anything in the reachable portion of the callee at
750  // this callsite that makes this inlining potentially illegal. Need to
751  // set ComputeFullInlineCost, otherwise getInlineCost may return early
752  // when cost exceeds threshold without checking all IRs in the callee.
753  // The acutal cost does not matter because we only checks isNever() to
754  // see if it is legal to inline the callsite.
755  InlineCost Cost =
756  getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
757  None, nullptr, nullptr);
758  if (Cost.isNever()) {
759  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
760  << "incompatible inlining");
761  return false;
762  }
763  InlineFunctionInfo IFI(nullptr, &GetAC);
764  if (InlineFunction(CS, IFI)) {
765  // The call to InlineFunction erases I, so we can't pass it here.
766  ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
767  << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
768  << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
769  return true;
770  }
771  return false;
772 }
773 
774 /// Iteratively inline hot callsites of a function.
775 ///
776 /// Iteratively traverse all callsites of the function \p F, and find if
777 /// the corresponding inlined instance exists and is hot in profile. If
778 /// it is hot enough, inline the callsites and adds new callsites of the
779 /// callee into the caller. If the call is an indirect call, first promote
780 /// it to direct call. Each indirect call is limited with a single target.
781 ///
782 /// \param F function to perform iterative inlining.
783 /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
784 /// inlined in the profiled binary.
785 ///
786 /// \returns True if there is any inline happened.
787 bool SampleProfileLoader::inlineHotFunctions(
788  Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
789  DenseSet<Instruction *> PromotedInsns;
790 
791  DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
792  bool Changed = false;
793  while (true) {
794  bool LocalChanged = false;
796  for (auto &BB : F) {
797  bool Hot = false;
799  for (auto &I : BB.getInstList()) {
800  const FunctionSamples *FS = nullptr;
801  if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
802  !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
803  Candidates.push_back(&I);
804  if (FS->getEntrySamples() > 0)
805  localNotInlinedCallSites.try_emplace(&I, FS);
806  if (callsiteIsHot(FS, PSI))
807  Hot = true;
808  }
809  }
810  if (Hot) {
811  CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
812  }
813  }
814  for (auto I : CIS) {
815  Function *CalledFunction = CallSite(I).getCalledFunction();
816  // Do not inline recursive calls.
817  if (CalledFunction == &F)
818  continue;
819  if (CallSite(I).isIndirectCall()) {
820  if (PromotedInsns.count(I))
821  continue;
822  uint64_t Sum;
823  for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
824  if (IsThinLTOPreLink) {
825  FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
827  continue;
828  }
829  auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
830  // If it is a recursive call, we do not inline it as it could bloat
831  // the code exponentially. There is way to better handle this, e.g.
832  // clone the caller first, and inline the cloned caller if it is
833  // recursive. As llvm does not inline recursive calls, we will
834  // simply ignore it instead of handling it explicitly.
835  if (CalleeFunctionName == F.getName())
836  continue;
837 
838  if (!callsiteIsHot(FS, PSI))
839  continue;
840 
841  const char *Reason = "Callee function not available";
842  auto R = SymbolMap.find(CalleeFunctionName);
843  if (R != SymbolMap.end() && R->getValue() &&
844  !R->getValue()->isDeclaration() &&
845  R->getValue()->getSubprogram() &&
846  isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
847  uint64_t C = FS->getEntrySamples();
848  Instruction *DI =
849  pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
850  Sum -= C;
851  PromotedInsns.insert(I);
852  // If profile mismatches, we should not attempt to inline DI.
853  if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
854  inlineCallInstruction(DI)) {
855  localNotInlinedCallSites.erase(I);
856  LocalChanged = true;
857  }
858  } else {
859  LLVM_DEBUG(dbgs()
860  << "\nFailed to promote indirect call to "
861  << CalleeFunctionName << " because " << Reason << "\n");
862  }
863  }
864  } else if (CalledFunction && CalledFunction->getSubprogram() &&
865  !CalledFunction->isDeclaration()) {
866  if (inlineCallInstruction(I)) {
867  localNotInlinedCallSites.erase(I);
868  LocalChanged = true;
869  }
870  } else if (IsThinLTOPreLink) {
871  findCalleeFunctionSamples(*I)->findInlinedFunctions(
872  InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
873  }
874  }
875  if (LocalChanged) {
876  Changed = true;
877  } else {
878  break;
879  }
880  }
881 
882  // Accumulate not inlined callsite information into notInlinedSamples
883  for (const auto &Pair : localNotInlinedCallSites) {
884  Instruction *I = Pair.getFirst();
886  if (!Callee || Callee->isDeclaration())
887  continue;
888  const FunctionSamples *FS = Pair.getSecond();
889  auto pair =
890  notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
891  pair.first->second.entryCount += FS->getEntrySamples();
892  }
893  return Changed;
894 }
895 
896 /// Find equivalence classes for the given block.
897 ///
898 /// This finds all the blocks that are guaranteed to execute the same
899 /// number of times as \p BB1. To do this, it traverses all the
900 /// descendants of \p BB1 in the dominator or post-dominator tree.
901 ///
902 /// A block BB2 will be in the same equivalence class as \p BB1 if
903 /// the following holds:
904 ///
905 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
906 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
907 /// dominate BB1 in the post-dominator tree.
908 ///
909 /// 2- Both BB2 and \p BB1 must be in the same loop.
910 ///
911 /// For every block BB2 that meets those two requirements, we set BB2's
912 /// equivalence class to \p BB1.
913 ///
914 /// \param BB1 Block to check.
915 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
916 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
917 /// with blocks from \p BB1's dominator tree, then
918 /// this is the post-dominator tree, and vice versa.
919 template <bool IsPostDom>
920 void SampleProfileLoader::findEquivalencesFor(
921  BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
923  const BasicBlock *EC = EquivalenceClass[BB1];
924  uint64_t Weight = BlockWeights[EC];
925  for (const auto *BB2 : Descendants) {
926  bool IsDomParent = DomTree->dominates(BB2, BB1);
927  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
928  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
929  EquivalenceClass[BB2] = EC;
930  // If BB2 is visited, then the entire EC should be marked as visited.
931  if (VisitedBlocks.count(BB2)) {
932  VisitedBlocks.insert(EC);
933  }
934 
935  // If BB2 is heavier than BB1, make BB2 have the same weight
936  // as BB1.
937  //
938  // Note that we don't worry about the opposite situation here
939  // (when BB2 is lighter than BB1). We will deal with this
940  // during the propagation phase. Right now, we just want to
941  // make sure that BB1 has the largest weight of all the
942  // members of its equivalence set.
943  Weight = std::max(Weight, BlockWeights[BB2]);
944  }
945  }
946  if (EC == &EC->getParent()->getEntryBlock()) {
947  BlockWeights[EC] = Samples->getHeadSamples() + 1;
948  } else {
949  BlockWeights[EC] = Weight;
950  }
951 }
952 
953 /// Find equivalence classes.
954 ///
955 /// Since samples may be missing from blocks, we can fill in the gaps by setting
956 /// the weights of all the blocks in the same equivalence class to the same
957 /// weight. To compute the concept of equivalence, we use dominance and loop
958 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
959 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
960 ///
961 /// \param F The function to query.
962 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
963  SmallVector<BasicBlock *, 8> DominatedBBs;
964  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
965  // Find equivalence sets based on dominance and post-dominance information.
966  for (auto &BB : F) {
967  BasicBlock *BB1 = &BB;
968 
969  // Compute BB1's equivalence class once.
970  if (EquivalenceClass.count(BB1)) {
971  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
972  continue;
973  }
974 
975  // By default, blocks are in their own equivalence class.
976  EquivalenceClass[BB1] = BB1;
977 
978  // Traverse all the blocks dominated by BB1. We are looking for
979  // every basic block BB2 such that:
980  //
981  // 1- BB1 dominates BB2.
982  // 2- BB2 post-dominates BB1.
983  // 3- BB1 and BB2 are in the same loop nest.
984  //
985  // If all those conditions hold, it means that BB2 is executed
986  // as many times as BB1, so they are placed in the same equivalence
987  // class by making BB2's equivalence class be BB1.
988  DominatedBBs.clear();
989  DT->getDescendants(BB1, DominatedBBs);
990  findEquivalencesFor(BB1, DominatedBBs, PDT.get());
991 
992  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
993  }
994 
995  // Assign weights to equivalence classes.
996  //
997  // All the basic blocks in the same equivalence class will execute
998  // the same number of times. Since we know that the head block in
999  // each equivalence class has the largest weight, assign that weight
1000  // to all the blocks in that equivalence class.
1001  LLVM_DEBUG(
1002  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
1003  for (auto &BI : F) {
1004  const BasicBlock *BB = &BI;
1005  const BasicBlock *EquivBB = EquivalenceClass[BB];
1006  if (BB != EquivBB)
1007  BlockWeights[BB] = BlockWeights[EquivBB];
1008  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
1009  }
1010 }
1011 
1012 /// Visit the given edge to decide if it has a valid weight.
1013 ///
1014 /// If \p E has not been visited before, we copy to \p UnknownEdge
1015 /// and increment the count of unknown edges.
1016 ///
1017 /// \param E Edge to visit.
1018 /// \param NumUnknownEdges Current number of unknown edges.
1019 /// \param UnknownEdge Set if E has not been visited before.
1020 ///
1021 /// \returns E's weight, if known. Otherwise, return 0.
1022 uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
1023  Edge *UnknownEdge) {
1024  if (!VisitedEdges.count(E)) {
1025  (*NumUnknownEdges)++;
1026  *UnknownEdge = E;
1027  return 0;
1028  }
1029 
1030  return EdgeWeights[E];
1031 }
1032 
1033 /// Propagate weights through incoming/outgoing edges.
1034 ///
1035 /// If the weight of a basic block is known, and there is only one edge
1036 /// with an unknown weight, we can calculate the weight of that edge.
1037 ///
1038 /// Similarly, if all the edges have a known count, we can calculate the
1039 /// count of the basic block, if needed.
1040 ///
1041 /// \param F Function to process.
1042 /// \param UpdateBlockCount Whether we should update basic block counts that
1043 /// has already been annotated.
1044 ///
1045 /// \returns True if new weights were assigned to edges or blocks.
1046 bool SampleProfileLoader::propagateThroughEdges(Function &F,
1047  bool UpdateBlockCount) {
1048  bool Changed = false;
1049  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
1050  for (const auto &BI : F) {
1051  const BasicBlock *BB = &BI;
1052  const BasicBlock *EC = EquivalenceClass[BB];
1053 
1054  // Visit all the predecessor and successor edges to determine
1055  // which ones have a weight assigned already. Note that it doesn't
1056  // matter that we only keep track of a single unknown edge. The
1057  // only case we are interested in handling is when only a single
1058  // edge is unknown (see setEdgeOrBlockWeight).
1059  for (unsigned i = 0; i < 2; i++) {
1060  uint64_t TotalWeight = 0;
1061  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1062  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1063 
1064  if (i == 0) {
1065  // First, visit all predecessor edges.
1066  NumTotalEdges = Predecessors[BB].size();
1067  for (auto *Pred : Predecessors[BB]) {
1068  Edge E = std::make_pair(Pred, BB);
1069  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1070  if (E.first == E.second)
1071  SelfReferentialEdge = E;
1072  }
1073  if (NumTotalEdges == 1) {
1074  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1075  }
1076  } else {
1077  // On the second round, visit all successor edges.
1078  NumTotalEdges = Successors[BB].size();
1079  for (auto *Succ : Successors[BB]) {
1080  Edge E = std::make_pair(BB, Succ);
1081  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1082  }
1083  if (NumTotalEdges == 1) {
1084  SingleEdge = std::make_pair(BB, Successors[BB][0]);
1085  }
1086  }
1087 
1088  // After visiting all the edges, there are three cases that we
1089  // can handle immediately:
1090  //
1091  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
1092  // In this case, we simply check that the sum of all the edges
1093  // is the same as BB's weight. If not, we change BB's weight
1094  // to match. Additionally, if BB had not been visited before,
1095  // we mark it visited.
1096  //
1097  // - Only one edge is unknown and BB has already been visited.
1098  // In this case, we can compute the weight of the edge by
1099  // subtracting the total block weight from all the known
1100  // edge weights. If the edges weight more than BB, then the
1101  // edge of the last remaining edge is set to zero.
1102  //
1103  // - There exists a self-referential edge and the weight of BB is
1104  // known. In this case, this edge can be based on BB's weight.
1105  // We add up all the other known edges and set the weight on
1106  // the self-referential edge as we did in the previous case.
1107  //
1108  // In any other case, we must continue iterating. Eventually,
1109  // all edges will get a weight, or iteration will stop when
1110  // it reaches SampleProfileMaxPropagateIterations.
1111  if (NumUnknownEdges <= 1) {
1112  uint64_t &BBWeight = BlockWeights[EC];
1113  if (NumUnknownEdges == 0) {
1114  if (!VisitedBlocks.count(EC)) {
1115  // If we already know the weight of all edges, the weight of the
1116  // basic block can be computed. It should be no larger than the sum
1117  // of all edge weights.
1118  if (TotalWeight > BBWeight) {
1119  BBWeight = TotalWeight;
1120  Changed = true;
1121  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
1122  << " known. Set weight for block: ";
1123  printBlockWeight(dbgs(), BB););
1124  }
1125  } else if (NumTotalEdges == 1 &&
1126  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1127  // If there is only one edge for the visited basic block, use the
1128  // block weight to adjust edge weight if edge weight is smaller.
1129  EdgeWeights[SingleEdge] = BlockWeights[EC];
1130  Changed = true;
1131  }
1132  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1133  // If there is a single unknown edge and the block has been
1134  // visited, then we can compute E's weight.
1135  if (BBWeight >= TotalWeight)
1136  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1137  else
1138  EdgeWeights[UnknownEdge] = 0;
1139  const BasicBlock *OtherEC;
1140  if (i == 0)
1141  OtherEC = EquivalenceClass[UnknownEdge.first];
1142  else
1143  OtherEC = EquivalenceClass[UnknownEdge.second];
1144  // Edge weights should never exceed the BB weights it connects.
1145  if (VisitedBlocks.count(OtherEC) &&
1146  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1147  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1148  VisitedEdges.insert(UnknownEdge);
1149  Changed = true;
1150  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
1151  printEdgeWeight(dbgs(), UnknownEdge));
1152  }
1153  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1154  // If a block Weights 0, all its in/out edges should weight 0.
1155  if (i == 0) {
1156  for (auto *Pred : Predecessors[BB]) {
1157  Edge E = std::make_pair(Pred, BB);
1158  EdgeWeights[E] = 0;
1159  VisitedEdges.insert(E);
1160  }
1161  } else {
1162  for (auto *Succ : Successors[BB]) {
1163  Edge E = std::make_pair(BB, Succ);
1164  EdgeWeights[E] = 0;
1165  VisitedEdges.insert(E);
1166  }
1167  }
1168  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1169  uint64_t &BBWeight = BlockWeights[BB];
1170  // We have a self-referential edge and the weight of BB is known.
1171  if (BBWeight >= TotalWeight)
1172  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1173  else
1174  EdgeWeights[SelfReferentialEdge] = 0;
1175  VisitedEdges.insert(SelfReferentialEdge);
1176  Changed = true;
1177  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
1178  printEdgeWeight(dbgs(), SelfReferentialEdge));
1179  }
1180  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1181  BlockWeights[EC] = TotalWeight;
1182  VisitedBlocks.insert(EC);
1183  Changed = true;
1184  }
1185  }
1186  }
1187 
1188  return Changed;
1189 }
1190 
1191 /// Build in/out edge lists for each basic block in the CFG.
1192 ///
1193 /// We are interested in unique edges. If a block B1 has multiple
1194 /// edges to another block B2, we only add a single B1->B2 edge.
1195 void SampleProfileLoader::buildEdges(Function &F) {
1196  for (auto &BI : F) {
1197  BasicBlock *B1 = &BI;
1198 
1199  // Add predecessors for B1.
1201  if (!Predecessors[B1].empty())
1202  llvm_unreachable("Found a stale predecessors list in a basic block.");
1203  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
1204  BasicBlock *B2 = *PI;
1205  if (Visited.insert(B2).second)
1206  Predecessors[B1].push_back(B2);
1207  }
1208 
1209  // Add successors for B1.
1210  Visited.clear();
1211  if (!Successors[B1].empty())
1212  llvm_unreachable("Found a stale successors list in a basic block.");
1213  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
1214  BasicBlock *B2 = *SI;
1215  if (Visited.insert(B2).second)
1216  Successors[B1].push_back(B2);
1217  }
1218  }
1219 }
1220 
1221 /// Returns the sorted CallTargetMap \p M by count in descending order.
1223  const SampleRecord::CallTargetMap &M) {
1225  for (auto I = M.begin(); I != M.end(); ++I)
1226  R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1227  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
1228  if (L.Count == R.Count)
1229  return L.Value > R.Value;
1230  else
1231  return L.Count > R.Count;
1232  });
1233  return R;
1234 }
1235 
1236 /// Propagate weights into edges
1237 ///
1238 /// The following rules are applied to every block BB in the CFG:
1239 ///
1240 /// - If BB has a single predecessor/successor, then the weight
1241 /// of that edge is the weight of the block.
1242 ///
1243 /// - If all incoming or outgoing edges are known except one, and the
1244 /// weight of the block is already known, the weight of the unknown
1245 /// edge will be the weight of the block minus the sum of all the known
1246 /// edges. If the sum of all the known edges is larger than BB's weight,
1247 /// we set the unknown edge weight to zero.
1248 ///
1249 /// - If there is a self-referential edge, and the weight of the block is
1250 /// known, the weight for that edge is set to the weight of the block
1251 /// minus the weight of the other incoming edges to that block (if
1252 /// known).
1253 void SampleProfileLoader::propagateWeights(Function &F) {
1254  bool Changed = true;
1255  unsigned I = 0;
1256 
1257  // If BB weight is larger than its corresponding loop's header BB weight,
1258  // use the BB weight to replace the loop header BB weight.
1259  for (auto &BI : F) {
1260  BasicBlock *BB = &BI;
1261  Loop *L = LI->getLoopFor(BB);
1262  if (!L) {
1263  continue;
1264  }
1265  BasicBlock *Header = L->getHeader();
1266  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1267  BlockWeights[Header] = BlockWeights[BB];
1268  }
1269  }
1270 
1271  // Before propagation starts, build, for each block, a list of
1272  // unique predecessors and successors. This is necessary to handle
1273  // identical edges in multiway branches. Since we visit all blocks and all
1274  // edges of the CFG, it is cleaner to build these lists once at the start
1275  // of the pass.
1276  buildEdges(F);
1277 
1278  // Propagate until we converge or we go past the iteration limit.
1279  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1280  Changed = propagateThroughEdges(F, false);
1281  }
1282 
1283  // The first propagation propagates BB counts from annotated BBs to unknown
1284  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1285  // to propagate edge weights.
1286  VisitedEdges.clear();
1287  Changed = true;
1288  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1289  Changed = propagateThroughEdges(F, false);
1290  }
1291 
1292  // The 3rd propagation pass allows adjust annotated BB weights that are
1293  // obviously wrong.
1294  Changed = true;
1295  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1296  Changed = propagateThroughEdges(F, true);
1297  }
1298 
1299  // Generate MD_prof metadata for every branch instruction using the
1300  // edge weights computed during propagation.
1301  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1302  LLVMContext &Ctx = F.getContext();
1303  MDBuilder MDB(Ctx);
1304  for (auto &BI : F) {
1305  BasicBlock *BB = &BI;
1306 
1307  if (BlockWeights[BB]) {
1308  for (auto &I : BB->getInstList()) {
1309  if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1310  continue;
1311  CallSite CS(&I);
1312  if (!CS.getCalledFunction()) {
1313  const DebugLoc &DLoc = I.getDebugLoc();
1314  if (!DLoc)
1315  continue;
1316  const DILocation *DIL = DLoc;
1317  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
1318  uint32_t Discriminator = DIL->getBaseDiscriminator();
1319 
1320  const FunctionSamples *FS = findFunctionSamples(I);
1321  if (!FS)
1322  continue;
1323  auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
1324  if (!T || T.get().empty())
1325  continue;
1326  SmallVector<InstrProfValueData, 2> SortedCallTargets =
1327  SortCallTargets(T.get());
1328  uint64_t Sum;
1329  findIndirectCallFunctionSamples(I, Sum);
1330  annotateValueSite(*I.getParent()->getParent()->getParent(), I,
1331  SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1332  SortedCallTargets.size());
1333  } else if (!isa<IntrinsicInst>(&I)) {
1334  I.setMetadata(LLVMContext::MD_prof,
1335  MDB.createBranchWeights(
1336  {static_cast<uint32_t>(BlockWeights[BB])}));
1337  }
1338  }
1339  }
1340  Instruction *TI = BB->getTerminator();
1341  if (TI->getNumSuccessors() == 1)
1342  continue;
1343  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1344  continue;
1345 
1346  DebugLoc BranchLoc = TI->getDebugLoc();
1347  LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1348  << ((BranchLoc) ? Twine(BranchLoc.getLine())
1349  : Twine("<UNKNOWN LOCATION>"))
1350  << ".\n");
1351  SmallVector<uint32_t, 4> Weights;
1352  uint32_t MaxWeight = 0;
1353  Instruction *MaxDestInst;
1354  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1355  BasicBlock *Succ = TI->getSuccessor(I);
1356  Edge E = std::make_pair(BB, Succ);
1357  uint64_t Weight = EdgeWeights[E];
1358  LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1359  // Use uint32_t saturated arithmetic to adjust the incoming weights,
1360  // if needed. Sample counts in profiles are 64-bit unsigned values,
1361  // but internally branch weights are expressed as 32-bit values.
1362  if (Weight > std::numeric_limits<uint32_t>::max()) {
1363  LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
1365  }
1366  // Weight is added by one to avoid propagation errors introduced by
1367  // 0 weights.
1368  Weights.push_back(static_cast<uint32_t>(Weight + 1));
1369  if (Weight != 0) {
1370  if (Weight > MaxWeight) {
1371  MaxWeight = Weight;
1372  MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1373  }
1374  }
1375  }
1376 
1377  uint64_t TempWeight;
1378  // Only set weights if there is at least one non-zero weight.
1379  // In any other case, let the analyzer set weights.
1380  // Do not set weights if the weights are present. In ThinLTO, the profile
1381  // annotation is done twice. If the first annotation already set the
1382  // weights, the second pass does not need to set it.
1383  if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
1384  LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1386  MDB.createBranchWeights(Weights));
1387  ORE->emit([&]() {
1388  return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1389  << "most popular destination for conditional branches at "
1390  << ore::NV("CondBranchesLoc", BranchLoc);
1391  });
1392  } else {
1393  LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1394  }
1395  }
1396 }
1397 
1398 /// Get the line number for the function header.
1399 ///
1400 /// This looks up function \p F in the current compilation unit and
1401 /// retrieves the line number where the function is defined. This is
1402 /// line 0 for all the samples read from the profile file. Every line
1403 /// number is relative to this line.
1404 ///
1405 /// \param F Function object to query.
1406 ///
1407 /// \returns the line number where \p F is defined. If it returns 0,
1408 /// it means that there is no debug information available for \p F.
1409 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
1410  if (DISubprogram *S = F.getSubprogram())
1411  return S->getLine();
1412 
1413  if (NoWarnSampleUnused)
1414  return 0;
1415 
1416  // If the start of \p F is missing, emit a diagnostic to inform the user
1417  // about the missed opportunity.
1419  "No debug information found in function " + F.getName() +
1420  ": Function profile not used",
1421  DS_Warning));
1422  return 0;
1423 }
1424 
1425 void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
1426  DT.reset(new DominatorTree);
1427  DT->recalculate(F);
1428 
1429  PDT.reset(new PostDominatorTree(F));
1430 
1431  LI.reset(new LoopInfo);
1432  LI->analyze(*DT);
1433 }
1434 
1435 /// Generate branch weight metadata for all branches in \p F.
1436 ///
1437 /// Branch weights are computed out of instruction samples using a
1438 /// propagation heuristic. Propagation proceeds in 3 phases:
1439 ///
1440 /// 1- Assignment of block weights. All the basic blocks in the function
1441 /// are initial assigned the same weight as their most frequently
1442 /// executed instruction.
1443 ///
1444 /// 2- Creation of equivalence classes. Since samples may be missing from
1445 /// blocks, we can fill in the gaps by setting the weights of all the
1446 /// blocks in the same equivalence class to the same weight. To compute
1447 /// the concept of equivalence, we use dominance and loop information.
1448 /// Two blocks B1 and B2 are in the same equivalence class if B1
1449 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
1450 ///
1451 /// 3- Propagation of block weights into edges. This uses a simple
1452 /// propagation heuristic. The following rules are applied to every
1453 /// block BB in the CFG:
1454 ///
1455 /// - If BB has a single predecessor/successor, then the weight
1456 /// of that edge is the weight of the block.
1457 ///
1458 /// - If all the edges are known except one, and the weight of the
1459 /// block is already known, the weight of the unknown edge will
1460 /// be the weight of the block minus the sum of all the known
1461 /// edges. If the sum of all the known edges is larger than BB's weight,
1462 /// we set the unknown edge weight to zero.
1463 ///
1464 /// - If there is a self-referential edge, and the weight of the block is
1465 /// known, the weight for that edge is set to the weight of the block
1466 /// minus the weight of the other incoming edges to that block (if
1467 /// known).
1468 ///
1469 /// Since this propagation is not guaranteed to finalize for every CFG, we
1470 /// only allow it to proceed for a limited number of iterations (controlled
1471 /// by -sample-profile-max-propagate-iterations).
1472 ///
1473 /// FIXME: Try to replace this propagation heuristic with a scheme
1474 /// that is guaranteed to finalize. A work-list approach similar to
1475 /// the standard value propagation algorithm used by SSA-CCP might
1476 /// work here.
1477 ///
1478 /// Once all the branch weights are computed, we emit the MD_prof
1479 /// metadata on BB using the computed values for each of its branches.
1480 ///
1481 /// \param F The function to query.
1482 ///
1483 /// \returns true if \p F was modified. Returns false, otherwise.
1484 bool SampleProfileLoader::emitAnnotations(Function &F) {
1485  bool Changed = false;
1486 
1487  if (getFunctionLoc(F) == 0)
1488  return false;
1489 
1490  LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1491  << F.getName() << ": " << getFunctionLoc(F) << "\n");
1492 
1493  DenseSet<GlobalValue::GUID> InlinedGUIDs;
1494  Changed |= inlineHotFunctions(F, InlinedGUIDs);
1495 
1496  // Compute basic block weights.
1497  Changed |= computeBlockWeights(F);
1498 
1499  if (Changed) {
1500  // Add an entry count to the function using the samples gathered at the
1501  // function entry.
1502  // Sets the GUIDs that are inlined in the profiled binary. This is used
1503  // for ThinLink to make correct liveness analysis, and also make the IR
1504  // match the profiled binary before annotation.
1505  F.setEntryCount(
1506  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1507  &InlinedGUIDs);
1508 
1509  // Compute dominance and loop info needed for propagation.
1510  computeDominanceAndLoopInfo(F);
1511 
1512  // Find equivalence classes.
1513  findEquivalenceClasses(F);
1514 
1515  // Propagate weights to all edges.
1516  propagateWeights(F);
1517  }
1518 
1519  // If coverage checking was requested, compute it now.
1521  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1522  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1523  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1524  if (Coverage < SampleProfileRecordCoverage) {
1526  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1527  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1528  Twine(Coverage) + "%) were applied",
1529  DS_Warning));
1530  }
1531  }
1532 
1534  uint64_t Used = CoverageTracker.getTotalUsedSamples();
1535  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1536  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1537  if (Coverage < SampleProfileSampleCoverage) {
1539  F.getSubprogram()->getFilename(), getFunctionLoc(F),
1540  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1541  Twine(Coverage) + "%) were applied",
1542  DS_Warning));
1543  }
1544  }
1545  return Changed;
1546 }
1547 
1549 
1550 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
1551  "Sample Profile loader", false, false)
1555 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
1556  "Sample Profile loader", false, false)
1557 
1558 bool SampleProfileLoader::doInitialization(Module &M) {
1559  auto &Ctx = M.getContext();
1560  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
1561  if (std::error_code EC = ReaderOrErr.getError()) {
1562  std::string Msg = "Could not open profile: " + EC.message();
1563  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1564  return false;
1565  }
1566  Reader = std::move(ReaderOrErr.get());
1567  Reader->collectFuncsToUse(M);
1568  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1569 
1570  if (!RemappingFilename.empty()) {
1571  // Apply profile remappings to the loaded profile data if requested.
1572  // For now, we only support remapping symbols encoded using the Itanium
1573  // C++ ABI's name mangling scheme.
1575  RemappingFilename, Ctx, std::move(Reader));
1576  if (std::error_code EC = ReaderOrErr.getError()) {
1577  std::string Msg = "Could not open profile remapping file: " + EC.message();
1578  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1579  return false;
1580  }
1581  Reader = std::move(ReaderOrErr.get());
1582  ProfileIsValid = (Reader->read() == sampleprof_error::success);
1583  }
1584  return true;
1585 }
1586 
1588  return new SampleProfileLoaderLegacyPass();
1589 }
1590 
1592  return new SampleProfileLoaderLegacyPass(Name);
1593 }
1594 
1595 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
1596  ProfileSummaryInfo *_PSI) {
1598  if (!ProfileIsValid)
1599  return false;
1600 
1601  PSI = _PSI;
1602  if (M.getProfileSummary(/* IsCS */ false) == nullptr)
1603  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
1605 
1606  // Compute the total number of samples collected in this profile.
1607  for (const auto &I : Reader->getProfiles())
1608  TotalCollectedSamples += I.second.getTotalSamples();
1609 
1610  // Populate the symbol map.
1611  for (const auto &N_F : M.getValueSymbolTable()) {
1612  StringRef OrigName = N_F.getKey();
1613  Function *F = dyn_cast<Function>(N_F.getValue());
1614  if (F == nullptr)
1615  continue;
1616  SymbolMap[OrigName] = F;
1617  auto pos = OrigName.find('.');
1618  if (pos != StringRef::npos) {
1619  StringRef NewName = OrigName.substr(0, pos);
1620  auto r = SymbolMap.insert(std::make_pair(NewName, F));
1621  // Failiing to insert means there is already an entry in SymbolMap,
1622  // thus there are multiple functions that are mapped to the same
1623  // stripped name. In this case of name conflicting, set the value
1624  // to nullptr to avoid confusion.
1625  if (!r.second)
1626  r.first->second = nullptr;
1627  }
1628  }
1629 
1630  bool retval = false;
1631  for (auto &F : M)
1632  if (!F.isDeclaration()) {
1633  clearFunctionData();
1634  retval |= runOnFunction(F, AM);
1635  }
1636 
1637  // Account for cold calls not inlined....
1638  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
1639  notInlinedCallInfo)
1640  updateProfileCallee(pair.first, pair.second.entryCount);
1641 
1642  return retval;
1643 }
1644 
1645 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
1646  ACT = &getAnalysis<AssumptionCacheTracker>();
1647  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1648  ProfileSummaryInfo *PSI =
1649  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1650  return SampleLoader.runOnModule(M, nullptr, PSI);
1651 }
1652 
1654 
1655  DILocation2SampleMap.clear();
1656  // By default the entry count is initialized to -1, which will be treated
1657  // conservatively by getEntryCount as the same as unknown (None). This is
1658  // to avoid newly added code to be treated as cold. If we have samples
1659  // this will be overwritten in emitAnnotations.
1660  // If ProfileSampleAccurate is true or F has profile-sample-accurate
1661  // attribute, initialize the entry count to 0 so callsites or functions
1662  // unsampled will be treated as cold.
1663  uint64_t initialEntryCount =
1664  (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
1665  ? 0
1666  : -1;
1667  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
1668  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1669  if (AM) {
1670  auto &FAM =
1672  .getManager();
1673  ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1674  } else {
1675  OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
1676  ORE = OwnedORE.get();
1677  }
1678  Samples = Reader->getSamplesFor(F);
1679  if (Samples && !Samples->empty())
1680  return emitAnnotations(F);
1681  return false;
1682 }
1683 
1685  ModuleAnalysisManager &AM) {
1687  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1688 
1689  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
1690  return FAM.getResult<AssumptionAnalysis>(F);
1691  };
1692  auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
1693  return FAM.getResult<TargetIRAnalysis>(F);
1694  };
1695 
1696  SampleProfileLoader SampleLoader(
1697  ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1698  ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1699  : ProfileRemappingFileName,
1700  IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1701 
1702  SampleLoader.doInitialization(M);
1703 
1705  if (!SampleLoader.runOnModule(M, &AM, PSI))
1706  return PreservedAnalyses::all();
1707 
1708  return PreservedAnalyses::none();
1709 }
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
Definition: SampleProf.h:283
static uint64_t getGUID(StringRef Name)
Definition: SampleProf.h:515
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
Represents either an error or a value T.
Definition: ErrorOr.h:56
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Definition: Module.h:568
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
Metadata * getProfileSummary(bool IsCS)
Returns profile summary metadata.
Definition: Module.cpp:541
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
unsigned getLine() const
Definition: DebugLoc.cpp:25
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
Definition: SampleProf.h:256
A debug info location.
Definition: DebugLoc.h:33
F(f)
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:382
static SmallVector< InstrProfValueData, 2 > SortCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
bool isHotCount(uint64_t C)
Returns true if count C is considered hot.
Represents the cost of inlining a function.
Definition: InlineCost.h:63
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Representation of the samples collected for a function.
Definition: SampleProf.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Definition: BitVector.h:937
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1449
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
Diagnostic information for optimization analysis remarks.
static StringRef getName(Value *V)
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
StringRef getFilename() const
BlockT * getHeader() const
Definition: LoopInfo.h:102
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Subprogram description.
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
Definition: BasicBlock.cpp:203
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1336
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
Debug location.
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
Core dominator tree base class.
Definition: LoopInfo.h:63
const BasicBlock & getEntryBlock() const
Definition: Function.h:656
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1504
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:350
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:285
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
Definition: Core.h:48
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:932
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Class to represent profile counts.
Definition: Function.h:260
size_t size() const
Definition: SmallVector.h:52
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Definition: SampleProf.h:409
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:180
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1122
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
Definition: SampleProf.h:294
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:164
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
Definition: SampleProf.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:219
amdgpu Simplify well known AMD library false FunctionCallee Callee
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
#define DEBUG_TYPE
static const size_t npos
Definition: StringRef.h:50
iterator begin()
Definition: StringMap.h:314
Represents the relative location of an instruction.
Definition: SampleProf.h:117
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
static bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI)
Return true if the given callsite is hot wrt to hot cutoff threshold.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Establish a view to a call site for examination.
Definition: CallSite.h:897
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
std::map< std::string, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:208
StringRef getName() const
Return the function name.
Definition: SampleProf.h:406
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:227
Provides ErrorOr<T> smart pointer.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:320
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
Definition: SampleProf.h:353
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
Definition: SampleProf.h:331
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind)
Attach profile summary metadata to this module.
Definition: Module.cpp:534
#define LLVM_DEBUG(X)
Definition: Debug.h:122
The optimization diagnostic interface.
iterator end()
Definition: StringMap.h:317
This file provides the interface for the sampled PGO loader pass.
reference get()
Definition: ErrorOr.h:156
const BasicBlock * getParent() const
Definition: Instruction.h:66
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044