LLVM  14.0.0git
SampleProfileLoaderBaseImpl.h
Go to the documentation of this file.
1 ////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides the interface for the sampled PGO profile loader base
11 /// implementation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16 #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Module.h"
43 
44 namespace llvm {
45 using namespace sampleprof;
46 using namespace sampleprofutil;
48 
49 #define DEBUG_TYPE "sample-profile-impl"
50 
51 namespace afdo_detail {
52 
53 template <typename BlockT> struct IRTraits;
54 template <> struct IRTraits<BasicBlock> {
59  using LoopT = Loop;
60  using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
61  using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
63  using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
68  static Function &getFunction(Function &F) { return F; }
69  static const BasicBlock *getEntryBB(const Function *F) {
70  return &F->getEntryBlock();
71  }
74 };
75 
76 } // end namespace afdo_detail
77 
78 extern cl::opt<bool> SampleProfileUseProfi;
79 
80 template <typename BT> class SampleProfileLoaderBaseImpl {
81 public:
82  SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
83  : Filename(Name), RemappingFilename(RemapName) {}
84  void dump() { Reader->dump(); }
85 
88  using BlockFrequencyInfoT =
93  using DominatorTreePtrT =
95  using PostDominatorTreePtrT =
97  using PostDominatorTreeT =
99  using OptRemarkEmitterT =
101  using OptRemarkAnalysisT =
105 
107  using EquivalenceClassMap =
109  using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
111  using BlockEdgeMap =
113 
114 protected:
115  ~SampleProfileLoaderBaseImpl() = default;
116  friend class SampleCoverageTracker;
117 
120  }
123  }
126  }
129  }
130 
131  unsigned getFunctionLoc(FunctionT &Func);
132  virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
133  ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
134  ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
137  virtual const FunctionSamples *
138  findFunctionSamples(const InstructionT &I) const;
139  void printEdgeWeight(raw_ostream &OS, Edge E);
140  void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const;
141  void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB);
142  bool computeBlockWeights(FunctionT &F);
143  void findEquivalenceClasses(FunctionT &F);
144  void findEquivalencesFor(BasicBlockT *BB1,
145  ArrayRef<BasicBlockT *> Descendants,
146  PostDominatorTreeT *DomTree);
147  void propagateWeights(FunctionT &F);
148  void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
149  BlockWeightMap &SampleBlockWeights,
150  BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
151  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
152  void buildEdges(FunctionT &F);
153  bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
154  void clearFunctionData(bool ResetDT = true);
155  void computeDominanceAndLoopInfo(FunctionT &F);
156  bool
157  computeAndPropagateWeights(FunctionT &F,
158  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
159  void initWeightPropagation(FunctionT &F,
160  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
161  void
162  finalizeWeightPropagation(FunctionT &F,
163  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
164  void emitCoverageRemarks(FunctionT &F);
165 
166  /// Map basic blocks to their computed weights.
167  ///
168  /// The weight of a basic block is defined to be the maximum
169  /// of all the instruction weights in that block.
171 
172  /// Map edges to their computed weights.
173  ///
174  /// Edge weights are computed by propagating basic block weights in
175  /// SampleProfile::propagateWeights.
177 
178  /// Set of visited blocks during propagation.
180 
181  /// Set of visited edges during propagation.
183 
184  /// Equivalence classes for block weights.
185  ///
186  /// Two blocks BB1 and BB2 are in the same equivalence class if they
187  /// dominate and post-dominate each other, and they are in the same loop
188  /// nest. When this happens, the two blocks are guaranteed to execute
189  /// the same number of times.
191 
192  /// Dominance, post-dominance and loop information.
196 
197  /// Predecessors for each basic block in the CFG.
199 
200  /// Successors for each basic block in the CFG.
202 
203  /// Profile coverage tracker.
205 
206  /// Profile reader object.
207  std::unique_ptr<SampleProfileReader> Reader;
208 
209  /// Samples collected for the body of this function.
210  FunctionSamples *Samples = nullptr;
211 
212  /// Name of the profile file to load.
213  std::string Filename;
214 
215  /// Name of the profile remapping file to load.
216  std::string RemappingFilename;
217 
218  /// Profile Summary Info computed from sample profile.
219  ProfileSummaryInfo *PSI = nullptr;
220 
221  /// Optimization Remark Emitter used to emit diagnostic remarks.
222  OptRemarkEmitterT *ORE = nullptr;
223 };
224 
225 /// Clear all the per-function data used to load samples and propagate weights.
226 template <typename BT>
228  BlockWeights.clear();
229  EdgeWeights.clear();
230  VisitedBlocks.clear();
231  VisitedEdges.clear();
232  EquivalenceClass.clear();
233  if (ResetDT) {
234  DT = nullptr;
235  PDT = nullptr;
236  LI = nullptr;
237  }
238  Predecessors.clear();
239  Successors.clear();
240  CoverageTracker.clear();
241 }
242 
243 #ifndef NDEBUG
244 /// Print the weight of edge \p E on stream \p OS.
245 ///
246 /// \param OS Stream to emit the output to.
247 /// \param E Edge to print.
248 template <typename BT>
250  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
251  << "]: " << EdgeWeights[E] << "\n";
252 }
253 
254 /// Print the equivalence class of block \p BB on stream \p OS.
255 ///
256 /// \param OS Stream to emit the output to.
257 /// \param BB Block to print.
258 template <typename BT>
260  raw_ostream &OS, const BasicBlockT *BB) {
261  const BasicBlockT *Equiv = EquivalenceClass[BB];
262  OS << "equivalence[" << BB->getName()
263  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
264 }
265 
266 /// Print the weight of block \p BB on stream \p OS.
267 ///
268 /// \param OS Stream to emit the output to.
269 /// \param BB Block to print.
270 template <typename BT>
272  raw_ostream &OS, const BasicBlockT *BB) const {
273  const auto &I = BlockWeights.find(BB);
274  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
275  OS << "weight[" << BB->getName() << "]: " << W << "\n";
276 }
277 #endif
278 
279 /// Get the weight for an instruction.
280 ///
281 /// The "weight" of an instruction \p Inst is the number of samples
282 /// collected on that instruction at runtime. To retrieve it, we
283 /// need to compute the line number of \p Inst relative to the start of its
284 /// function. We use HeaderLineno to compute the offset. We then
285 /// look up the samples collected for \p Inst using BodySamples.
286 ///
287 /// \param Inst Instruction to query.
288 ///
289 /// \returns the weight of \p Inst.
290 template <typename BT>
293  return getInstWeightImpl(Inst);
294 }
295 
296 template <typename BT>
299  const FunctionSamples *FS = findFunctionSamples(Inst);
300  if (!FS)
301  return std::error_code();
302 
303  const DebugLoc &DLoc = Inst.getDebugLoc();
304  if (!DLoc)
305  return std::error_code();
306 
307  const DILocation *DIL = DLoc;
308  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
309  uint32_t Discriminator;
311  Discriminator = DIL->getDiscriminator();
312  else
313  Discriminator = DIL->getBaseDiscriminator();
314 
315  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
316  if (R) {
317  bool FirstMark =
318  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
319  if (FirstMark) {
320  ORE->emit([&]() {
321  OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
322  Remark << "Applied " << ore::NV("NumSamples", *R);
323  Remark << " samples from profile (offset: ";
324  Remark << ore::NV("LineOffset", LineOffset);
325  if (Discriminator) {
326  Remark << ".";
327  Remark << ore::NV("Discriminator", Discriminator);
328  }
329  Remark << ")";
330  return Remark;
331  });
332  }
333  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":"
334  << Inst << " (line offset: " << LineOffset << "."
335  << Discriminator << " - weight: " << R.get() << ")\n");
336  }
337  return R;
338 }
339 
340 /// Compute the weight of a basic block.
341 ///
342 /// The weight of basic block \p BB is the maximum weight of all the
343 /// instructions in BB.
344 ///
345 /// \param BB The basic block to query.
346 ///
347 /// \returns the weight for \p BB.
348 template <typename BT>
351  uint64_t Max = 0;
352  bool HasWeight = false;
353  for (auto &I : *BB) {
354  const ErrorOr<uint64_t> &R = getInstWeight(I);
355  if (R) {
356  Max = std::max(Max, R.get());
357  HasWeight = true;
358  }
359  }
360  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
361 }
362 
363 /// Compute and store the weights of every basic block.
364 ///
365 /// This populates the BlockWeights map by computing
366 /// the weights of every basic block in the CFG.
367 ///
368 /// \param F The function to query.
369 template <typename BT>
371  bool Changed = false;
372  LLVM_DEBUG(dbgs() << "Block weights\n");
373  for (const auto &BB : F) {
374  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
375  if (Weight) {
376  BlockWeights[&BB] = Weight.get();
377  VisitedBlocks.insert(&BB);
378  Changed = true;
379  }
380  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
381  }
382 
383  return Changed;
384 }
385 
386 /// Get the FunctionSamples for an instruction.
387 ///
388 /// The FunctionSamples of an instruction \p Inst is the inlined instance
389 /// in which that instruction is coming from. We traverse the inline stack
390 /// of that instruction, and match it with the tree nodes in the profile.
391 ///
392 /// \param Inst Instruction to query.
393 ///
394 /// \returns the FunctionSamples pointer to the inlined instance.
395 template <typename BT>
397  const InstructionT &Inst) const {
398  const DILocation *DIL = Inst.getDebugLoc();
399  if (!DIL)
400  return Samples;
401 
402  auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
403  if (it.second) {
404  it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
405  }
406  return it.first->second;
407 }
408 
409 /// Find equivalence classes for the given block.
410 ///
411 /// This finds all the blocks that are guaranteed to execute the same
412 /// number of times as \p BB1. To do this, it traverses all the
413 /// descendants of \p BB1 in the dominator or post-dominator tree.
414 ///
415 /// A block BB2 will be in the same equivalence class as \p BB1 if
416 /// the following holds:
417 ///
418 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
419 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
420 /// dominate BB1 in the post-dominator tree.
421 ///
422 /// 2- Both BB2 and \p BB1 must be in the same loop.
423 ///
424 /// For every block BB2 that meets those two requirements, we set BB2's
425 /// equivalence class to \p BB1.
426 ///
427 /// \param BB1 Block to check.
428 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
429 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
430 /// with blocks from \p BB1's dominator tree, then
431 /// this is the post-dominator tree, and vice versa.
432 template <typename BT>
434  BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
435  PostDominatorTreeT *DomTree) {
436  const BasicBlockT *EC = EquivalenceClass[BB1];
437  uint64_t Weight = BlockWeights[EC];
438  for (const auto *BB2 : Descendants) {
439  bool IsDomParent = DomTree->dominates(BB2, BB1);
440  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
441  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
442  EquivalenceClass[BB2] = EC;
443  // If BB2 is visited, then the entire EC should be marked as visited.
444  if (VisitedBlocks.count(BB2)) {
445  VisitedBlocks.insert(EC);
446  }
447 
448  // If BB2 is heavier than BB1, make BB2 have the same weight
449  // as BB1.
450  //
451  // Note that we don't worry about the opposite situation here
452  // (when BB2 is lighter than BB1). We will deal with this
453  // during the propagation phase. Right now, we just want to
454  // make sure that BB1 has the largest weight of all the
455  // members of its equivalence set.
456  Weight = std::max(Weight, BlockWeights[BB2]);
457  }
458  }
459  const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
460  if (EC == EntryBB) {
461  BlockWeights[EC] = Samples->getHeadSamples() + 1;
462  } else {
463  BlockWeights[EC] = Weight;
464  }
465 }
466 
467 /// Find equivalence classes.
468 ///
469 /// Since samples may be missing from blocks, we can fill in the gaps by setting
470 /// the weights of all the blocks in the same equivalence class to the same
471 /// weight. To compute the concept of equivalence, we use dominance and loop
472 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
473 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
474 ///
475 /// \param F The function to query.
476 template <typename BT>
478  SmallVector<BasicBlockT *, 8> DominatedBBs;
479  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
480  // Find equivalence sets based on dominance and post-dominance information.
481  for (auto &BB : F) {
482  BasicBlockT *BB1 = &BB;
483 
484  // Compute BB1's equivalence class once.
485  if (EquivalenceClass.count(BB1)) {
486  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
487  continue;
488  }
489 
490  // By default, blocks are in their own equivalence class.
491  EquivalenceClass[BB1] = BB1;
492 
493  // Traverse all the blocks dominated by BB1. We are looking for
494  // every basic block BB2 such that:
495  //
496  // 1- BB1 dominates BB2.
497  // 2- BB2 post-dominates BB1.
498  // 3- BB1 and BB2 are in the same loop nest.
499  //
500  // If all those conditions hold, it means that BB2 is executed
501  // as many times as BB1, so they are placed in the same equivalence
502  // class by making BB2's equivalence class be BB1.
503  DominatedBBs.clear();
504  DT->getDescendants(BB1, DominatedBBs);
505  findEquivalencesFor(BB1, DominatedBBs, &*PDT);
506 
507  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
508  }
509 
510  // Assign weights to equivalence classes.
511  //
512  // All the basic blocks in the same equivalence class will execute
513  // the same number of times. Since we know that the head block in
514  // each equivalence class has the largest weight, assign that weight
515  // to all the blocks in that equivalence class.
516  LLVM_DEBUG(
517  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
518  for (auto &BI : F) {
519  const BasicBlockT *BB = &BI;
520  const BasicBlockT *EquivBB = EquivalenceClass[BB];
521  if (BB != EquivBB)
522  BlockWeights[BB] = BlockWeights[EquivBB];
523  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
524  }
525 }
526 
527 /// Visit the given edge to decide if it has a valid weight.
528 ///
529 /// If \p E has not been visited before, we copy to \p UnknownEdge
530 /// and increment the count of unknown edges.
531 ///
532 /// \param E Edge to visit.
533 /// \param NumUnknownEdges Current number of unknown edges.
534 /// \param UnknownEdge Set if E has not been visited before.
535 ///
536 /// \returns E's weight, if known. Otherwise, return 0.
537 template <typename BT>
539  unsigned *NumUnknownEdges,
540  Edge *UnknownEdge) {
541  if (!VisitedEdges.count(E)) {
542  (*NumUnknownEdges)++;
543  *UnknownEdge = E;
544  return 0;
545  }
546 
547  return EdgeWeights[E];
548 }
549 
550 /// Propagate weights through incoming/outgoing edges.
551 ///
552 /// If the weight of a basic block is known, and there is only one edge
553 /// with an unknown weight, we can calculate the weight of that edge.
554 ///
555 /// Similarly, if all the edges have a known count, we can calculate the
556 /// count of the basic block, if needed.
557 ///
558 /// \param F Function to process.
559 /// \param UpdateBlockCount Whether we should update basic block counts that
560 /// has already been annotated.
561 ///
562 /// \returns True if new weights were assigned to edges or blocks.
563 template <typename BT>
565  FunctionT &F, bool UpdateBlockCount) {
566  bool Changed = false;
567  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
568  for (const auto &BI : F) {
569  const BasicBlockT *BB = &BI;
570  const BasicBlockT *EC = EquivalenceClass[BB];
571 
572  // Visit all the predecessor and successor edges to determine
573  // which ones have a weight assigned already. Note that it doesn't
574  // matter that we only keep track of a single unknown edge. The
575  // only case we are interested in handling is when only a single
576  // edge is unknown (see setEdgeOrBlockWeight).
577  for (unsigned i = 0; i < 2; i++) {
578  uint64_t TotalWeight = 0;
579  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
580  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
581 
582  if (i == 0) {
583  // First, visit all predecessor edges.
584  NumTotalEdges = Predecessors[BB].size();
585  for (auto *Pred : Predecessors[BB]) {
586  Edge E = std::make_pair(Pred, BB);
587  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
588  if (E.first == E.second)
589  SelfReferentialEdge = E;
590  }
591  if (NumTotalEdges == 1) {
592  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
593  }
594  } else {
595  // On the second round, visit all successor edges.
596  NumTotalEdges = Successors[BB].size();
597  for (auto *Succ : Successors[BB]) {
598  Edge E = std::make_pair(BB, Succ);
599  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
600  }
601  if (NumTotalEdges == 1) {
602  SingleEdge = std::make_pair(BB, Successors[BB][0]);
603  }
604  }
605 
606  // After visiting all the edges, there are three cases that we
607  // can handle immediately:
608  //
609  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
610  // In this case, we simply check that the sum of all the edges
611  // is the same as BB's weight. If not, we change BB's weight
612  // to match. Additionally, if BB had not been visited before,
613  // we mark it visited.
614  //
615  // - Only one edge is unknown and BB has already been visited.
616  // In this case, we can compute the weight of the edge by
617  // subtracting the total block weight from all the known
618  // edge weights. If the edges weight more than BB, then the
619  // edge of the last remaining edge is set to zero.
620  //
621  // - There exists a self-referential edge and the weight of BB is
622  // known. In this case, this edge can be based on BB's weight.
623  // We add up all the other known edges and set the weight on
624  // the self-referential edge as we did in the previous case.
625  //
626  // In any other case, we must continue iterating. Eventually,
627  // all edges will get a weight, or iteration will stop when
628  // it reaches SampleProfileMaxPropagateIterations.
629  if (NumUnknownEdges <= 1) {
630  uint64_t &BBWeight = BlockWeights[EC];
631  if (NumUnknownEdges == 0) {
632  if (!VisitedBlocks.count(EC)) {
633  // If we already know the weight of all edges, the weight of the
634  // basic block can be computed. It should be no larger than the sum
635  // of all edge weights.
636  if (TotalWeight > BBWeight) {
637  BBWeight = TotalWeight;
638  Changed = true;
639  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
640  << " known. Set weight for block: ";
641  printBlockWeight(dbgs(), BB););
642  }
643  } else if (NumTotalEdges == 1 &&
644  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
645  // If there is only one edge for the visited basic block, use the
646  // block weight to adjust edge weight if edge weight is smaller.
647  EdgeWeights[SingleEdge] = BlockWeights[EC];
648  Changed = true;
649  }
650  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
651  // If there is a single unknown edge and the block has been
652  // visited, then we can compute E's weight.
653  if (BBWeight >= TotalWeight)
654  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
655  else
656  EdgeWeights[UnknownEdge] = 0;
657  const BasicBlockT *OtherEC;
658  if (i == 0)
659  OtherEC = EquivalenceClass[UnknownEdge.first];
660  else
661  OtherEC = EquivalenceClass[UnknownEdge.second];
662  // Edge weights should never exceed the BB weights it connects.
663  if (VisitedBlocks.count(OtherEC) &&
664  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
665  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
666  VisitedEdges.insert(UnknownEdge);
667  Changed = true;
668  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
669  printEdgeWeight(dbgs(), UnknownEdge));
670  }
671  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
672  // If a block Weights 0, all its in/out edges should weight 0.
673  if (i == 0) {
674  for (auto *Pred : Predecessors[BB]) {
675  Edge E = std::make_pair(Pred, BB);
676  EdgeWeights[E] = 0;
677  VisitedEdges.insert(E);
678  }
679  } else {
680  for (auto *Succ : Successors[BB]) {
681  Edge E = std::make_pair(BB, Succ);
682  EdgeWeights[E] = 0;
683  VisitedEdges.insert(E);
684  }
685  }
686  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
687  uint64_t &BBWeight = BlockWeights[BB];
688  // We have a self-referential edge and the weight of BB is known.
689  if (BBWeight >= TotalWeight)
690  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
691  else
692  EdgeWeights[SelfReferentialEdge] = 0;
693  VisitedEdges.insert(SelfReferentialEdge);
694  Changed = true;
695  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
696  printEdgeWeight(dbgs(), SelfReferentialEdge));
697  }
698  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
699  BlockWeights[EC] = TotalWeight;
700  VisitedBlocks.insert(EC);
701  Changed = true;
702  }
703  }
704  }
705 
706  return Changed;
707 }
708 
709 /// Build in/out edge lists for each basic block in the CFG.
710 ///
711 /// We are interested in unique edges. If a block B1 has multiple
712 /// edges to another block B2, we only add a single B1->B2 edge.
713 template <typename BT>
715  for (auto &BI : F) {
716  BasicBlockT *B1 = &BI;
717 
718  // Add predecessors for B1.
720  if (!Predecessors[B1].empty())
721  llvm_unreachable("Found a stale predecessors list in a basic block.");
722  for (auto *B2 : getPredecessors(B1))
723  if (Visited.insert(B2).second)
724  Predecessors[B1].push_back(B2);
725 
726  // Add successors for B1.
727  Visited.clear();
728  if (!Successors[B1].empty())
729  llvm_unreachable("Found a stale successors list in a basic block.");
730  for (auto *B2 : getSuccessors(B1))
731  if (Visited.insert(B2).second)
732  Successors[B1].push_back(B2);
733  }
734 }
735 
736 /// Propagate weights into edges
737 ///
738 /// The following rules are applied to every block BB in the CFG:
739 ///
740 /// - If BB has a single predecessor/successor, then the weight
741 /// of that edge is the weight of the block.
742 ///
743 /// - If all incoming or outgoing edges are known except one, and the
744 /// weight of the block is already known, the weight of the unknown
745 /// edge will be the weight of the block minus the sum of all the known
746 /// edges. If the sum of all the known edges is larger than BB's weight,
747 /// we set the unknown edge weight to zero.
748 ///
749 /// - If there is a self-referential edge, and the weight of the block is
750 /// known, the weight for that edge is set to the weight of the block
751 /// minus the weight of the other incoming edges to that block (if
752 /// known).
753 template <typename BT>
755  // Flow-based profile inference is only usable with BasicBlock instantiation
756  // of SampleProfileLoaderBaseImpl.
757  if (SampleProfileUseProfi) {
758  // Prepare block sample counts for inference.
759  BlockWeightMap SampleBlockWeights;
760  for (const auto &BI : F) {
761  ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
762  if (Weight)
763  SampleBlockWeights[&BI] = Weight.get();
764  }
765  // Fill in BlockWeights and EdgeWeights using an inference algorithm.
766  applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
767  } else {
768  bool Changed = true;
769  unsigned I = 0;
770 
771  // If BB weight is larger than its corresponding loop's header BB weight,
772  // use the BB weight to replace the loop header BB weight.
773  for (auto &BI : F) {
774  BasicBlockT *BB = &BI;
775  LoopT *L = LI->getLoopFor(BB);
776  if (!L) {
777  continue;
778  }
779  BasicBlockT *Header = L->getHeader();
780  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
781  BlockWeights[Header] = BlockWeights[BB];
782  }
783  }
784 
785  // Propagate until we converge or we go past the iteration limit.
786  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
787  Changed = propagateThroughEdges(F, false);
788  }
789 
790  // The first propagation propagates BB counts from annotated BBs to unknown
791  // BBs. The 2nd propagation pass resets edges weights, and use all BB
792  // weights to propagate edge weights.
793  VisitedEdges.clear();
794  Changed = true;
795  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
796  Changed = propagateThroughEdges(F, false);
797  }
798 
799  // The 3rd propagation pass allows adjust annotated BB weights that are
800  // obviously wrong.
801  Changed = true;
802  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
803  Changed = propagateThroughEdges(F, true);
804  }
805  }
806 }
807 
808 template <typename BT>
810  FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
811  BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
812  auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
813  Infer.apply(BlockWeights, EdgeWeights);
814 }
815 
816 /// Generate branch weight metadata for all branches in \p F.
817 ///
818 /// Branch weights are computed out of instruction samples using a
819 /// propagation heuristic. Propagation proceeds in 3 phases:
820 ///
821 /// 1- Assignment of block weights. All the basic blocks in the function
822 /// are initial assigned the same weight as their most frequently
823 /// executed instruction.
824 ///
825 /// 2- Creation of equivalence classes. Since samples may be missing from
826 /// blocks, we can fill in the gaps by setting the weights of all the
827 /// blocks in the same equivalence class to the same weight. To compute
828 /// the concept of equivalence, we use dominance and loop information.
829 /// Two blocks B1 and B2 are in the same equivalence class if B1
830 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
831 ///
832 /// 3- Propagation of block weights into edges. This uses a simple
833 /// propagation heuristic. The following rules are applied to every
834 /// block BB in the CFG:
835 ///
836 /// - If BB has a single predecessor/successor, then the weight
837 /// of that edge is the weight of the block.
838 ///
839 /// - If all the edges are known except one, and the weight of the
840 /// block is already known, the weight of the unknown edge will
841 /// be the weight of the block minus the sum of all the known
842 /// edges. If the sum of all the known edges is larger than BB's weight,
843 /// we set the unknown edge weight to zero.
844 ///
845 /// - If there is a self-referential edge, and the weight of the block is
846 /// known, the weight for that edge is set to the weight of the block
847 /// minus the weight of the other incoming edges to that block (if
848 /// known).
849 ///
850 /// Since this propagation is not guaranteed to finalize for every CFG, we
851 /// only allow it to proceed for a limited number of iterations (controlled
852 /// by -sample-profile-max-propagate-iterations).
853 ///
854 /// FIXME: Try to replace this propagation heuristic with a scheme
855 /// that is guaranteed to finalize. A work-list approach similar to
856 /// the standard value propagation algorithm used by SSA-CCP might
857 /// work here.
858 ///
859 /// \param F The function to query.
860 ///
861 /// \returns true if \p F was modified. Returns false, otherwise.
862 template <typename BT>
864  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
865  bool Changed = (InlinedGUIDs.size() != 0);
866 
867  // Compute basic block weights.
868  Changed |= computeBlockWeights(F);
869 
870  if (Changed) {
871  // Initialize propagation.
872  initWeightPropagation(F, InlinedGUIDs);
873 
874  // Propagate weights to all edges.
875  propagateWeights(F);
876 
877  // Post-process propagated weights.
878  finalizeWeightPropagation(F, InlinedGUIDs);
879  }
880 
881  return Changed;
882 }
883 
884 template <typename BT>
886  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
887  // Add an entry count to the function using the samples gathered at the
888  // function entry.
889  // Sets the GUIDs that are inlined in the profiled binary. This is used
890  // for ThinLink to make correct liveness analysis, and also make the IR
891  // match the profiled binary before annotation.
892  getFunction(F).setEntryCount(
893  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
894  &InlinedGUIDs);
895 
896  if (!SampleProfileUseProfi) {
897  // Compute dominance and loop info needed for propagation.
898  computeDominanceAndLoopInfo(F);
899 
900  // Find equivalence classes.
901  findEquivalenceClasses(F);
902  }
903 
904  // Before propagation starts, build, for each block, a list of
905  // unique predecessors and successors. This is necessary to handle
906  // identical edges in multiway branches. Since we visit all blocks and all
907  // edges of the CFG, it is cleaner to build these lists once at the start
908  // of the pass.
909  buildEdges(F);
910 }
911 
912 template <typename BT>
914  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
915  // If we utilize a flow-based count inference, then we trust the computed
916  // counts and set the entry count as computed by the algorithm. This is
917  // primarily done to sync the counts produced by profi and BFI inference,
918  // which uses the entry count for mass propagation.
919  // If profi produces a zero-value for the entry count, we fallback to
920  // Samples->getHeadSamples() + 1 to avoid functions with zero count.
921  if (SampleProfileUseProfi) {
922  const BasicBlockT *EntryBB = getEntryBB(&F);
923  if (BlockWeights[EntryBB] > 0) {
924  getFunction(F).setEntryCount(
925  ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
926  &InlinedGUIDs);
927  }
928  }
929 }
930 
931 template <typename BT>
933  // If coverage checking was requested, compute it now.
934  const Function &Func = getFunction(F);
936  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
937  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
938  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
939  if (Coverage < SampleProfileRecordCoverage) {
940  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
941  Func.getSubprogram()->getFilename(), getFunctionLoc(F),
942  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
943  Twine(Coverage) + "%) were applied",
944  DS_Warning));
945  }
946  }
947 
949  uint64_t Used = CoverageTracker.getTotalUsedSamples();
950  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
951  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
952  if (Coverage < SampleProfileSampleCoverage) {
953  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
954  Func.getSubprogram()->getFilename(), getFunctionLoc(F),
955  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
956  Twine(Coverage) + "%) were applied",
957  DS_Warning));
958  }
959  }
960 }
961 
962 /// Get the line number for the function header.
963 ///
964 /// This looks up function \p F in the current compilation unit and
965 /// retrieves the line number where the function is defined. This is
966 /// line 0 for all the samples read from the profile file. Every line
967 /// number is relative to this line.
968 ///
969 /// \param F Function object to query.
970 ///
971 /// \returns the line number where \p F is defined. If it returns 0,
972 /// it means that there is no debug information available for \p F.
973 template <typename BT>
975  const Function &Func = getFunction(F);
976  if (DISubprogram *S = Func.getSubprogram())
977  return S->getLine();
978 
979  if (NoWarnSampleUnused)
980  return 0;
981 
982  // If the start of \p F is missing, emit a diagnostic to inform the user
983  // about the missed opportunity.
984  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
985  "No debug information found in function " + Func.getName() +
986  ": Function profile not used",
987  DS_Warning));
988  return 0;
989 }
990 
991 template <typename BT>
993  FunctionT &F) {
994  DT.reset(new DominatorTree);
995  DT->recalculate(F);
996 
997  PDT.reset(new PostDominatorTree(F));
998 
999  LI.reset(new LoopInfo);
1000  LI->analyze(*DT);
1001 }
1002 
1003 #undef DEBUG_TYPE
1004 
1005 } // namespace llvm
1006 #endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
llvm::SampleProfileLoaderBaseImpl::getEntryBB
const BasicBlockT * getEntryBB(const FunctionT *F)
Definition: SampleProfileLoaderBaseImpl.h:121
i
i
Definition: README.txt:29
B1
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::OptRemarkAnalysisT
typename afdo_detail::IRTraits< MachineBasicBlock >::OptRemarkAnalysisT OptRemarkAnalysisT
Definition: SampleProfileLoaderBaseImpl.h:102
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
llvm::SampleProfileLoaderBaseImpl::getFunction
Function & getFunction(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:118
DebugInfoMetadata.h
llvm::SampleProfileLoaderBaseImpl::visitEdge
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
Definition: SampleProfileLoaderBaseImpl.h:538
llvm::Function
Definition: Function.h:62
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::SuccRangeT
typename afdo_detail::IRTraits< MachineBasicBlock >::SuccRangeT SuccRangeT
Definition: SampleProfileLoaderBaseImpl.h:104
llvm::afdo_detail::IRTraits< BasicBlock >::PostDominatorTreePtrT
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:63
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:80
llvm::SampleProfileLoaderBaseImpl::Predecessors
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:198
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::SampleProfileLoaderBaseImpl::Successors
BlockEdgeMap Successors
Successors for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:201
llvm::EnableFSDiscriminator
cl::opt< bool > EnableFSDiscriminator
Definition: TargetPassConfig.cpp:382
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
DenseMap.h
Module.h
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::afdo_detail::IRTraits< BasicBlock >::getSuccessors
static succ_range getSuccessors(BasicBlock *BB)
Definition: SampleProfileLoaderBaseImpl.h:73
llvm::SampleProfileLoaderBaseImpl::LI
LoopInfoPtrT LI
Definition: SampleProfileLoaderBaseImpl.h:195
llvm::SmallSet< Edge, 32 >
llvm::SmallPtrSet< const BasicBlockT *, 32 >
llvm::SampleProfileLoaderBaseImpl::DT
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
Definition: SampleProfileLoaderBaseImpl.h:193
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
SampleProfileInference.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::BlockFrequencyInfoT
typename afdo_detail::IRTraits< MachineBasicBlock >::BlockFrequencyInfoT BlockFrequencyInfoT
Definition: SampleProfileLoaderBaseImpl.h:89
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::afdo_detail::IRTraits< BasicBlock >::getFunction
static Function & getFunction(Function &F)
Definition: SampleProfileLoaderBaseImpl.h:68
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::SampleProfileLoaderBaseImpl::initWeightPropagation
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Definition: SampleProfileLoaderBaseImpl.h:885
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::SampleProfileLoaderBaseImpl::getBlockWeight
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
Definition: SampleProfileLoaderBaseImpl.h:350
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::SampleProfileLoaderBaseImpl::propagateThroughEdges
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
Definition: SampleProfileLoaderBaseImpl.h:564
llvm::SampleProfileLoaderBaseImpl::printBlockEquivalence
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:259
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:47
llvm::afdo_detail::IRTraits< BasicBlock >::getEntryBB
static const BasicBlock * getEntryBB(const Function *F)
Definition: SampleProfileLoaderBaseImpl.h:69
llvm::SampleProfileLoaderBaseImpl::BlockWeights
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
Definition: SampleProfileLoaderBaseImpl.h:170
DenseSet.h
SampleProf.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::Instruction
Definition: Instruction.h:45
llvm::SampleProfileLoaderBaseImpl::Filename
std::string Filename
Name of the profile file to load.
Definition: SampleProfileLoaderBaseImpl.h:213
llvm::SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo
void computeDominanceAndLoopInfo(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:992
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SampleProfileLoaderBaseImpl::printEdgeWeight
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:249
DebugLoc.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::LoopInfoPtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::LoopInfoPtrT LoopInfoPtrT
Definition: SampleProfileLoaderBaseImpl.h:92
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:250
SampleProfileLoaderBaseUtil.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PostDominatorTreeT
typename afdo_detail::IRTraits< MachineBasicBlock >::PostDominatorTreeT PostDominatorTreeT
Definition: SampleProfileLoaderBaseImpl.h:98
llvm::SampleProfileLoaderBaseImpl::findFunctionSamples
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
Definition: SampleProfileLoaderBaseImpl.h:396
llvm::SampleProfileLoaderBaseImpl::getInstWeight
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
Definition: SampleProfileLoaderBaseImpl.h:292
llvm::SampleProfileLoaderBaseImpl::Reader
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
Definition: SampleProfileLoaderBaseImpl.h:207
CFG.h
LoopInfo.h
llvm::SampleProfileLoaderBaseImpl::emitCoverageRemarks
void emitCoverageRemarks(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:932
llvm::SampleProfileLoaderBaseImpl::SampleProfileLoaderBaseImpl
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
Definition: SampleProfileLoaderBaseImpl.h:82
llvm::SampleProfileLoaderBaseImpl::clearFunctionData
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
Definition: SampleProfileLoaderBaseImpl.h:227
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PredRangeT
typename afdo_detail::IRTraits< MachineBasicBlock >::PredRangeT PredRangeT
Definition: SampleProfileLoaderBaseImpl.h:103
llvm::SampleProfileLoaderBaseImpl::getFunctionLoc
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
Definition: SampleProfileLoaderBaseImpl.h:974
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::SampleProfileLoaderBaseImpl::computeAndPropagateWeights
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
Definition: SampleProfileLoaderBaseImpl.h:863
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::SampleProfileLoaderBaseImpl::computeBlockWeights
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
Definition: SampleProfileLoaderBaseImpl.h:370
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::DominatorTreePtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::DominatorTreePtrT DominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:94
llvm::succ_range
iterator_range< succ_iterator > succ_range
Definition: CFG.h:245
llvm::SampleProfileLoaderBaseImpl::PDT
PostDominatorTreePtrT PDT
Definition: SampleProfileLoaderBaseImpl.h:194
uint64_t
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:689
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::InstructionT
typename afdo_detail::IRTraits< MachineBasicBlock >::InstructionT InstructionT
Definition: SampleProfileLoaderBaseImpl.h:86
llvm::SampleProfileLoaderBaseImpl::CoverageTracker
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
Definition: SampleProfileLoaderBaseImpl.h:204
llvm::SampleProfileLoaderBaseImpl::getInstWeightImpl
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
Definition: SampleProfileLoaderBaseImpl.h:298
llvm::afdo_detail::IRTraits< BasicBlock >::getPredecessors
static pred_range getPredecessors(BasicBlock *BB)
Definition: SampleProfileLoaderBaseImpl.h:72
llvm::DenseMap< const BasicBlockT *, uint64_t >
I
#define I(x, y, z)
Definition: MD5.cpp:58
SampleProfReader.h
llvm::SampleProfileLoaderBaseImpl::findEquivalenceClasses
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
Definition: SampleProfileLoaderBaseImpl.h:477
ArrayRef.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfileLoaderBaseImpl.h:49
llvm::SampleProfileLoaderBaseImpl::EquivalenceClass
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
Definition: SampleProfileLoaderBaseImpl.h:190
llvm::SampleProfileLoaderBaseImpl::VisitedBlocks
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
Definition: SampleProfileLoaderBaseImpl.h:179
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PostDominatorTreePtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::PostDominatorTreePtrT PostDominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:96
llvm::SampleProfileLoaderBaseImpl::buildEdges
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:714
llvm::sampleprof::FunctionSamples::getOffset
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:223
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:286
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileSampleCoverage
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::LoopT
typename afdo_detail::IRTraits< MachineBasicBlock >::LoopT LoopT
Definition: SampleProfileLoaderBaseImpl.h:91
llvm::SampleProfileLoaderBaseImpl::RemappingFilename
std::string RemappingFilename
Name of the profile remapping file to load.
Definition: SampleProfileLoaderBaseImpl.h:216
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::BasicBlockT
typename afdo_detail::IRTraits< MachineBasicBlock >::BasicBlockT BasicBlockT
Definition: SampleProfileLoaderBaseImpl.h:87
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SampleProfileLoaderBaseImpl::applyProfi
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
Definition: SampleProfileLoaderBaseImpl.h:809
llvm::SampleProfileLoaderBaseImpl::propagateWeights
void propagateWeights(FunctionT &F)
Propagate weights into edges.
Definition: SampleProfileLoaderBaseImpl.h:754
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::SampleProfileLoaderBaseImpl::printBlockWeight
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:271
llvm::SampleProfileLoaderBaseImpl::findEquivalencesFor
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
Definition: SampleProfileLoaderBaseImpl.h:433
llvm::SampleProfileLoaderBaseImpl::dump
void dump()
Definition: SampleProfileLoaderBaseImpl.h:84
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:309
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
GenericDomTree.h
llvm::SampleProfileLoaderBaseImpl::VisitedEdges
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
Definition: SampleProfileLoaderBaseImpl.h:182
Function.h
llvm::afdo_detail::IRTraits< BasicBlock >::DominatorTreePtrT
std::unique_ptr< DominatorTree > DominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:61
llvm::sampleprofutil::SampleCoverageTracker
Definition: SampleProfileLoaderBaseUtil.h:41
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:581
llvm::SampleProfileLoaderBaseImpl::finalizeWeightPropagation
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Definition: SampleProfileLoaderBaseImpl.h:913
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:25
llvm::SampleProfileLoaderBaseImpl::getPredecessors
PredRangeT getPredecessors(BasicBlockT *BB)
Definition: SampleProfileLoaderBaseImpl.h:124
llvm::DILocation::getBaseDiscriminator
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Definition: DebugInfoMetadata.h:2239
Instructions.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::Edge
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
Definition: SampleProfileLoaderBaseImpl.h:109
llvm::SampleProfileInference
Sample profile inference pass.
Definition: SampleProfileInference.h:84
SmallVector.h
llvm::pred_range
iterator_range< pred_iterator > pred_range
Definition: CFG.h:108
llvm::SampleProfileLoaderBaseImpl::getSuccessors
SuccRangeT getSuccessors(BasicBlockT *BB)
Definition: SampleProfileLoaderBaseImpl.h:127
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
llvm::afdo_detail::IRTraits
Definition: SampleProfileLoaderBaseImpl.h:53
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SampleProfileMaxPropagateIterations
cl::opt< unsigned > SampleProfileMaxPropagateIterations
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SampleProfileLoaderBaseImpl::EdgeWeights
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
Definition: SampleProfileLoaderBaseImpl.h:176
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::FunctionT
typename afdo_detail::IRTraits< MachineBasicBlock >::FunctionT FunctionT
Definition: SampleProfileLoaderBaseImpl.h:90
llvm::DISubprogram
Subprogram description.
Definition: DebugInfoMetadata.h:1826
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::OptRemarkEmitterT
typename afdo_detail::IRTraits< MachineBasicBlock >::OptRemarkEmitterT OptRemarkEmitterT
Definition: SampleProfileLoaderBaseImpl.h:100
raw_ostream.h
llvm::afdo_detail::IRTraits< BasicBlock >::LoopInfoPtrT
std::unique_ptr< LoopInfo > LoopInfoPtrT
Definition: SampleProfileLoaderBaseImpl.h:60
llvm::SampleProfileLoaderBaseImpl::DILocation2SampleMap
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
Definition: SampleProfileLoaderBaseImpl.h:136
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::NoWarnSampleUnused
cl::opt< bool > NoWarnSampleUnused