LLVM  9.0.0svn
BlockFrequencyInfoImpl.h
Go to the documentation of this file.
1 //==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Shared implementation of BlockFrequency for IR and Machine Instructions.
10 // See the documentation below for BlockFrequencyInfoImpl for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
15 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/GraphTraits.h"
20 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Format.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstddef>
38 #include <cstdint>
39 #include <deque>
40 #include <iterator>
41 #include <limits>
42 #include <list>
43 #include <string>
44 #include <utility>
45 #include <vector>
46 
47 #define DEBUG_TYPE "block-freq"
48 
49 namespace llvm {
50 
51 class BranchProbabilityInfo;
52 class Function;
53 class Loop;
54 class LoopInfo;
55 class MachineBasicBlock;
56 class MachineBranchProbabilityInfo;
57 class MachineFunction;
58 class MachineLoop;
59 class MachineLoopInfo;
60 
61 namespace bfi_detail {
62 
63 struct IrreducibleGraph;
64 
65 // This is part of a workaround for a GCC 4.7 crash on lambdas.
66 template <class BT> struct BlockEdgesAdder;
67 
68 /// Mass of a block.
69 ///
70 /// This class implements a sort of fixed-point fraction always between 0.0 and
71 /// 1.0. getMass() == std::numeric_limits<uint64_t>::max() indicates a value of
72 /// 1.0.
73 ///
74 /// Masses can be added and subtracted. Simple saturation arithmetic is used,
75 /// so arithmetic operations never overflow or underflow.
76 ///
77 /// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses
78 /// an inexpensive floating-point algorithm that's off-by-one (almost, but not
79 /// quite, maximum precision).
80 ///
81 /// Masses can be scaled by \a BranchProbability at maximum precision.
82 class BlockMass {
83  uint64_t Mass = 0;
84 
85 public:
86  BlockMass() = default;
87  explicit BlockMass(uint64_t Mass) : Mass(Mass) {}
88 
89  static BlockMass getEmpty() { return BlockMass(); }
90 
91  static BlockMass getFull() {
93  }
94 
95  uint64_t getMass() const { return Mass; }
96 
97  bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); }
98  bool isEmpty() const { return !Mass; }
99 
100  bool operator!() const { return isEmpty(); }
101 
102  /// Add another mass.
103  ///
104  /// Adds another mass, saturating at \a isFull() rather than overflowing.
106  uint64_t Sum = Mass + X.Mass;
108  return *this;
109  }
110 
111  /// Subtract another mass.
112  ///
113  /// Subtracts another mass, saturating at \a isEmpty() rather than
114  /// undeflowing.
116  uint64_t Diff = Mass - X.Mass;
117  Mass = Diff > Mass ? 0 : Diff;
118  return *this;
119  }
120 
122  Mass = P.scale(Mass);
123  return *this;
124  }
125 
126  bool operator==(BlockMass X) const { return Mass == X.Mass; }
127  bool operator!=(BlockMass X) const { return Mass != X.Mass; }
128  bool operator<=(BlockMass X) const { return Mass <= X.Mass; }
129  bool operator>=(BlockMass X) const { return Mass >= X.Mass; }
130  bool operator<(BlockMass X) const { return Mass < X.Mass; }
131  bool operator>(BlockMass X) const { return Mass > X.Mass; }
132 
133  /// Convert to scaled number.
134  ///
135  /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty()
136  /// gives slightly above 0.0.
138 
139  void dump() const;
140  raw_ostream &print(raw_ostream &OS) const;
141 };
142 
144  return BlockMass(L) += R;
145 }
147  return BlockMass(L) -= R;
148 }
150  return BlockMass(L) *= R;
151 }
153  return BlockMass(R) *= L;
154 }
155 
157  return X.print(OS);
158 }
159 
160 } // end namespace bfi_detail
161 
162 template <> struct isPodLike<bfi_detail::BlockMass> {
163  static const bool value = true;
164 };
165 
166 /// Base class for BlockFrequencyInfoImpl
167 ///
168 /// BlockFrequencyInfoImplBase has supporting data structures and some
169 /// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on
170 /// the block type (or that call such algorithms) are skipped here.
171 ///
172 /// Nevertheless, the majority of the overall algorithm documention lives with
173 /// BlockFrequencyInfoImpl. See there for details.
175 public:
178 
179  /// Representative of a block.
180  ///
181  /// This is a simple wrapper around an index into the reverse-post-order
182  /// traversal of the blocks.
183  ///
184  /// Unlike a block pointer, its order has meaning (location in the
185  /// topological sort) and it's class is the same regardless of block type.
186  struct BlockNode {
188 
190 
191  BlockNode() = default;
192  BlockNode(IndexType Index) : Index(Index) {}
193 
194  bool operator==(const BlockNode &X) const { return Index == X.Index; }
195  bool operator!=(const BlockNode &X) const { return Index != X.Index; }
196  bool operator<=(const BlockNode &X) const { return Index <= X.Index; }
197  bool operator>=(const BlockNode &X) const { return Index >= X.Index; }
198  bool operator<(const BlockNode &X) const { return Index < X.Index; }
199  bool operator>(const BlockNode &X) const { return Index > X.Index; }
200 
201  bool isValid() const { return Index <= getMaxIndex(); }
202 
203  static size_t getMaxIndex() {
205  }
206  };
207 
208  /// Stats about a block itself.
209  struct FrequencyData {
211  uint64_t Integer;
212  };
213 
214  /// Data about a loop.
215  ///
216  /// Contains the data necessary to represent a loop as a pseudo-node once it's
217  /// packaged.
218  struct LoopData {
222 
223  LoopData *Parent; ///< The parent loop.
224  bool IsPackaged = false; ///< Whether this has been packaged.
225  uint32_t NumHeaders = 1; ///< Number of headers.
226  ExitMap Exits; ///< Successor edges (and weights).
227  NodeList Nodes; ///< Header and the members of the loop.
228  HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
231 
232  LoopData(LoopData *Parent, const BlockNode &Header)
233  : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {}
234 
235  template <class It1, class It2>
236  LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
237  It2 LastOther)
238  : Parent(Parent), Nodes(FirstHeader, LastHeader) {
239  NumHeaders = Nodes.size();
240  Nodes.insert(Nodes.end(), FirstOther, LastOther);
241  BackedgeMass.resize(NumHeaders);
242  }
243 
244  bool isHeader(const BlockNode &Node) const {
245  if (isIrreducible())
246  return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders,
247  Node);
248  return Node == Nodes[0];
249  }
250 
251  BlockNode getHeader() const { return Nodes[0]; }
252  bool isIrreducible() const { return NumHeaders > 1; }
253 
255  assert(isHeader(B) && "this is only valid on loop header blocks");
256  if (isIrreducible())
257  return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
258  Nodes.begin();
259  return 0;
260  }
261 
263  return Nodes.begin() + NumHeaders;
264  }
265 
266  NodeList::const_iterator members_end() const { return Nodes.end(); }
268  return make_range(members_begin(), members_end());
269  }
270  };
271 
272  /// Index of loop information.
273  struct WorkingData {
274  BlockNode Node; ///< This node.
275  LoopData *Loop = nullptr; ///< The loop this block is inside.
276  BlockMass Mass; ///< Mass distribution from the entry block.
277 
278  WorkingData(const BlockNode &Node) : Node(Node) {}
279 
280  bool isLoopHeader() const { return Loop && Loop->isHeader(Node); }
281 
282  bool isDoubleLoopHeader() const {
283  return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() &&
284  Loop->Parent->isHeader(Node);
285  }
286 
288  if (!isLoopHeader())
289  return Loop;
290  if (!isDoubleLoopHeader())
291  return Loop->Parent;
292  return Loop->Parent->Parent;
293  }
294 
295  /// Resolve a node to its representative.
296  ///
297  /// Get the node currently representing Node, which could be a containing
298  /// loop.
299  ///
300  /// This function should only be called when distributing mass. As long as
301  /// there are no irreducible edges to Node, then it will have complexity
302  /// O(1) in this context.
303  ///
304  /// In general, the complexity is O(L), where L is the number of loop
305  /// headers Node has been packaged into. Since this method is called in
306  /// the context of distributing mass, L will be the number of loop headers
307  /// an early exit edge jumps out of.
309  auto L = getPackagedLoop();
310  return L ? L->getHeader() : Node;
311  }
312 
314  if (!Loop || !Loop->IsPackaged)
315  return nullptr;
316  auto L = Loop;
317  while (L->Parent && L->Parent->IsPackaged)
318  L = L->Parent;
319  return L;
320  }
321 
322  /// Get the appropriate mass for a node.
323  ///
324  /// Get appropriate mass for Node. If Node is a loop-header (whose loop
325  /// has been packaged), returns the mass of its pseudo-node. If it's a
326  /// node inside a packaged loop, it returns the loop's mass.
328  if (!isAPackage())
329  return Mass;
330  if (!isADoublePackage())
331  return Loop->Mass;
332  return Loop->Parent->Mass;
333  }
334 
335  /// Has ContainingLoop been packaged up?
336  bool isPackaged() const { return getResolvedNode() != Node; }
337 
338  /// Has Loop been packaged up?
339  bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; }
340 
341  /// Has Loop been packaged up twice?
342  bool isADoublePackage() const {
343  return isDoubleLoopHeader() && Loop->Parent->IsPackaged;
344  }
345  };
346 
347  /// Unscaled probability weight.
348  ///
349  /// Probability weight for an edge in the graph (including the
350  /// successor/target node).
351  ///
352  /// All edges in the original function are 32-bit. However, exit edges from
353  /// loop packages are taken from 64-bit exit masses, so we need 64-bits of
354  /// space in general.
355  ///
356  /// In addition to the raw weight amount, Weight stores the type of the edge
357  /// in the current context (i.e., the context of the loop being processed).
358  /// Is this a local edge within the loop, an exit from the loop, or a
359  /// backedge to the loop header?
360  struct Weight {
361  enum DistType { Local, Exit, Backedge };
362  DistType Type = Local;
364  uint64_t Amount = 0;
365 
366  Weight() = default;
367  Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
368  : Type(Type), TargetNode(TargetNode), Amount(Amount) {}
369  };
370 
371  /// Distribution of unscaled probability weight.
372  ///
373  /// Distribution of unscaled probability weight to a set of successors.
374  ///
375  /// This class collates the successor edge weights for later processing.
376  ///
377  /// \a DidOverflow indicates whether \a Total did overflow while adding to
378  /// the distribution. It should never overflow twice.
379  struct Distribution {
381 
382  WeightList Weights; ///< Individual successor weights.
383  uint64_t Total = 0; ///< Sum of all weights.
384  bool DidOverflow = false; ///< Whether \a Total did overflow.
385 
386  Distribution() = default;
387 
388  void addLocal(const BlockNode &Node, uint64_t Amount) {
389  add(Node, Amount, Weight::Local);
390  }
391 
392  void addExit(const BlockNode &Node, uint64_t Amount) {
393  add(Node, Amount, Weight::Exit);
394  }
395 
396  void addBackedge(const BlockNode &Node, uint64_t Amount) {
397  add(Node, Amount, Weight::Backedge);
398  }
399 
400  /// Normalize the distribution.
401  ///
402  /// Combines multiple edges to the same \a Weight::TargetNode and scales
403  /// down so that \a Total fits into 32-bits.
404  ///
405  /// This is linear in the size of \a Weights. For the vast majority of
406  /// cases, adjacent edge weights are combined by sorting WeightList and
407  /// combining adjacent weights. However, for very large edge lists an
408  /// auxiliary hash table is used.
409  void normalize();
410 
411  private:
412  void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type);
413  };
414 
415  /// Data about each block. This is used downstream.
416  std::vector<FrequencyData> Freqs;
417 
418  /// Whether each block is an irreducible loop header.
419  /// This is used downstream.
421 
422  /// Loop data: see initializeLoops().
423  std::vector<WorkingData> Working;
424 
425  /// Indexed information about loops.
426  std::list<LoopData> Loops;
427 
428  /// Virtual destructor.
429  ///
430  /// Need a virtual destructor to mask the compiler warning about
431  /// getBlockName().
432  virtual ~BlockFrequencyInfoImplBase() = default;
433 
434  /// Add all edges out of a packaged loop to the distribution.
435  ///
436  /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each
437  /// successor edge.
438  ///
439  /// \return \c true unless there's an irreducible backedge.
440  bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop,
441  Distribution &Dist);
442 
443  /// Add an edge to the distribution.
444  ///
445  /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the
446  /// edge is local/exit/backedge is in the context of LoopHead. Otherwise,
447  /// every edge should be a local edge (since all the loops are packaged up).
448  ///
449  /// \return \c true unless aborted due to an irreducible backedge.
450  bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
451  const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
452 
454  assert(Head.Index < Working.size());
455  assert(Working[Head.Index].isLoopHeader());
456  return *Working[Head.Index].Loop;
457  }
458 
459  /// Analyze irreducible SCCs.
460  ///
461  /// Separate irreducible SCCs from \c G, which is an explict graph of \c
462  /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr).
463  /// Insert them into \a Loops before \c Insert.
464  ///
465  /// \return the \c LoopData nodes representing the irreducible SCCs.
467  analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop,
468  std::list<LoopData>::iterator Insert);
469 
470  /// Update a loop after packaging irreducible SCCs inside of it.
471  ///
472  /// Update \c OuterLoop. Before finding irreducible control flow, it was
473  /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a
474  /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged
475  /// up need to be removed from \a OuterLoop::Nodes.
476  void updateLoopWithIrreducible(LoopData &OuterLoop);
477 
478  /// Distribute mass according to a distribution.
479  ///
480  /// Distributes the mass in Source according to Dist. If LoopHead.isValid(),
481  /// backedges and exits are stored in its entry in Loops.
482  ///
483  /// Mass is distributed in parallel from two copies of the source mass.
484  void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
485  Distribution &Dist);
486 
487  /// Compute the loop scale for a loop.
488  void computeLoopScale(LoopData &Loop);
489 
490  /// Adjust the mass of all headers in an irreducible loop.
491  ///
492  /// Initially, irreducible loops are assumed to distribute their mass
493  /// equally among its headers. This can lead to wrong frequency estimates
494  /// since some headers may be executed more frequently than others.
495  ///
496  /// This adjusts header mass distribution so it matches the weights of
497  /// the backedges going into each of the loop headers.
498  void adjustLoopHeaderMass(LoopData &Loop);
499 
500  void distributeIrrLoopHeaderMass(Distribution &Dist);
501 
502  /// Package up a loop.
503  void packageLoop(LoopData &Loop);
504 
505  /// Unwrap loops.
506  void unwrapLoops();
507 
508  /// Finalize frequency metrics.
509  ///
510  /// Calculates final frequencies and cleans up no-longer-needed data
511  /// structures.
512  void finalizeMetrics();
513 
514  /// Clear all memory.
515  void clear();
516 
517  virtual std::string getBlockName(const BlockNode &Node) const;
518  std::string getLoopName(const LoopData &Loop) const;
519 
520  virtual raw_ostream &print(raw_ostream &OS) const { return OS; }
521  void dump() const { print(dbgs()); }
522 
523  Scaled64 getFloatingBlockFreq(const BlockNode &Node) const;
524 
525  BlockFrequency getBlockFreq(const BlockNode &Node) const;
526  Optional<uint64_t> getBlockProfileCount(const Function &F,
527  const BlockNode &Node) const;
528  Optional<uint64_t> getProfileCountFromFreq(const Function &F,
529  uint64_t Freq) const;
530  bool isIrrLoopHeader(const BlockNode &Node);
531 
532  void setBlockFreq(const BlockNode &Node, uint64_t Freq);
533 
534  raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const;
535  raw_ostream &printBlockFreq(raw_ostream &OS,
536  const BlockFrequency &Freq) const;
537 
538  uint64_t getEntryFreq() const {
539  assert(!Freqs.empty());
540  return Freqs[0].Integer;
541  }
542 };
543 
544 namespace bfi_detail {
545 
546 template <class BlockT> struct TypeMap {};
547 template <> struct TypeMap<BasicBlock> {
551  using LoopT = Loop;
553 };
554 template <> struct TypeMap<MachineBasicBlock> {
560 };
561 
562 /// Get the name of a MachineBasicBlock.
563 ///
564 /// Get the name of a MachineBasicBlock. It's templated so that including from
565 /// CodeGen is unnecessary (that would be a layering issue).
566 ///
567 /// This is used mainly for debug output. The name is similar to
568 /// MachineBasicBlock::getFullName(), but skips the name of the function.
569 template <class BlockT> std::string getBlockName(const BlockT *BB) {
570  assert(BB && "Unexpected nullptr");
571  auto MachineName = "BB" + Twine(BB->getNumber());
572  if (BB->getBasicBlock())
573  return (MachineName + "[" + BB->getName() + "]").str();
574  return MachineName.str();
575 }
576 /// Get the name of a BasicBlock.
577 template <> inline std::string getBlockName(const BasicBlock *BB) {
578  assert(BB && "Unexpected nullptr");
579  return BB->getName().str();
580 }
581 
582 /// Graph of irreducible control flow.
583 ///
584 /// This graph is used for determining the SCCs in a loop (or top-level
585 /// function) that has irreducible control flow.
586 ///
587 /// During the block frequency algorithm, the local graphs are defined in a
588 /// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock
589 /// graphs for most edges, but getting others from \a LoopData::ExitMap. The
590 /// latter only has successor information.
591 ///
592 /// \a IrreducibleGraph makes this graph explicit. It's in a form that can use
593 /// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator),
594 /// and it explicitly lists predecessors and successors. The initialization
595 /// that relies on \c MachineBasicBlock is defined in the header.
598 
600 
602  struct IrrNode {
604  unsigned NumIn = 0;
605  std::deque<const IrrNode *> Edges;
606 
607  IrrNode(const BlockNode &Node) : Node(Node) {}
608 
609  using iterator = std::deque<const IrrNode *>::const_iterator;
610 
611  iterator pred_begin() const { return Edges.begin(); }
612  iterator succ_begin() const { return Edges.begin() + NumIn; }
613  iterator pred_end() const { return succ_begin(); }
614  iterator succ_end() const { return Edges.end(); }
615  };
617  const IrrNode *StartIrr = nullptr;
618  std::vector<IrrNode> Nodes;
620 
621  /// Construct an explicit graph containing irreducible control flow.
622  ///
623  /// Construct an explicit graph of the control flow in \c OuterLoop (or the
624  /// top-level function, if \c OuterLoop is \c nullptr). Uses \c
625  /// addBlockEdges to add block successors that have not been packaged into
626  /// loops.
627  ///
628  /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected
629  /// user of this.
630  template <class BlockEdgesAdder>
632  BlockEdgesAdder addBlockEdges) : BFI(BFI) {
633  initialize(OuterLoop, addBlockEdges);
634  }
635 
636  template <class BlockEdgesAdder>
637  void initialize(const BFIBase::LoopData *OuterLoop,
638  BlockEdgesAdder addBlockEdges);
639  void addNodesInLoop(const BFIBase::LoopData &OuterLoop);
640  void addNodesInFunction();
641 
642  void addNode(const BlockNode &Node) {
643  Nodes.emplace_back(Node);
644  BFI.Working[Node.Index].getMass() = BlockMass::getEmpty();
645  }
646 
647  void indexNodes();
648  template <class BlockEdgesAdder>
649  void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop,
650  BlockEdgesAdder addBlockEdges);
651  void addEdge(IrrNode &Irr, const BlockNode &Succ,
652  const BFIBase::LoopData *OuterLoop);
653 };
654 
655 template <class BlockEdgesAdder>
657  BlockEdgesAdder addBlockEdges) {
658  if (OuterLoop) {
659  addNodesInLoop(*OuterLoop);
660  for (auto N : OuterLoop->Nodes)
661  addEdges(N, OuterLoop, addBlockEdges);
662  } else {
663  addNodesInFunction();
664  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
665  addEdges(Index, OuterLoop, addBlockEdges);
666  }
667  StartIrr = Lookup[Start.Index];
668 }
669 
670 template <class BlockEdgesAdder>
672  const BFIBase::LoopData *OuterLoop,
673  BlockEdgesAdder addBlockEdges) {
674  auto L = Lookup.find(Node.Index);
675  if (L == Lookup.end())
676  return;
677  IrrNode &Irr = *L->second;
678  const auto &Working = BFI.Working[Node.Index];
679 
680  if (Working.isAPackage())
681  for (const auto &I : Working.Loop->Exits)
682  addEdge(Irr, I.first, OuterLoop);
683  else
684  addBlockEdges(*this, Irr, OuterLoop);
685 }
686 
687 } // end namespace bfi_detail
688 
689 /// Shared implementation for block frequency analysis.
690 ///
691 /// This is a shared implementation of BlockFrequencyInfo and
692 /// MachineBlockFrequencyInfo, and calculates the relative frequencies of
693 /// blocks.
694 ///
695 /// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block,
696 /// which is called the header. A given loop, L, can have sub-loops, which are
697 /// loops within the subgraph of L that exclude its header. (A "trivial" SCC
698 /// consists of a single block that does not have a self-edge.)
699 ///
700 /// In addition to loops, this algorithm has limited support for irreducible
701 /// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are
702 /// discovered on they fly, and modelled as loops with multiple headers.
703 ///
704 /// The headers of irreducible sub-SCCs consist of its entry blocks and all
705 /// nodes that are targets of a backedge within it (excluding backedges within
706 /// true sub-loops). Block frequency calculations act as if a block is
707 /// inserted that intercepts all the edges to the headers. All backedges and
708 /// entries point to this block. Its successors are the headers, which split
709 /// the frequency evenly.
710 ///
711 /// This algorithm leverages BlockMass and ScaledNumber to maintain precision,
712 /// separates mass distribution from loop scaling, and dithers to eliminate
713 /// probability mass loss.
714 ///
715 /// The implementation is split between BlockFrequencyInfoImpl, which knows the
716 /// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and
717 /// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a
718 /// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in
719 /// reverse-post order. This gives two advantages: it's easy to compare the
720 /// relative ordering of two nodes, and maps keyed on BlockT can be represented
721 /// by vectors.
722 ///
723 /// This algorithm is O(V+E), unless there is irreducible control flow, in
724 /// which case it's O(V*E) in the worst case.
725 ///
726 /// These are the main stages:
727 ///
728 /// 0. Reverse post-order traversal (\a initializeRPOT()).
729 ///
730 /// Run a single post-order traversal and save it (in reverse) in RPOT.
731 /// All other stages make use of this ordering. Save a lookup from BlockT
732 /// to BlockNode (the index into RPOT) in Nodes.
733 ///
734 /// 1. Loop initialization (\a initializeLoops()).
735 ///
736 /// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of
737 /// the algorithm. In particular, store the immediate members of each loop
738 /// in reverse post-order.
739 ///
740 /// 2. Calculate mass and scale in loops (\a computeMassInLoops()).
741 ///
742 /// For each loop (bottom-up), distribute mass through the DAG resulting
743 /// from ignoring backedges and treating sub-loops as a single pseudo-node.
744 /// Track the backedge mass distributed to the loop header, and use it to
745 /// calculate the loop scale (number of loop iterations). Immediate
746 /// members that represent sub-loops will already have been visited and
747 /// packaged into a pseudo-node.
748 ///
749 /// Distributing mass in a loop is a reverse-post-order traversal through
750 /// the loop. Start by assigning full mass to the Loop header. For each
751 /// node in the loop:
752 ///
753 /// - Fetch and categorize the weight distribution for its successors.
754 /// If this is a packaged-subloop, the weight distribution is stored
755 /// in \a LoopData::Exits. Otherwise, fetch it from
756 /// BranchProbabilityInfo.
757 ///
758 /// - Each successor is categorized as \a Weight::Local, a local edge
759 /// within the current loop, \a Weight::Backedge, a backedge to the
760 /// loop header, or \a Weight::Exit, any successor outside the loop.
761 /// The weight, the successor, and its category are stored in \a
762 /// Distribution. There can be multiple edges to each successor.
763 ///
764 /// - If there's a backedge to a non-header, there's an irreducible SCC.
765 /// The usual flow is temporarily aborted. \a
766 /// computeIrreducibleMass() finds the irreducible SCCs within the
767 /// loop, packages them up, and restarts the flow.
768 ///
769 /// - Normalize the distribution: scale weights down so that their sum
770 /// is 32-bits, and coalesce multiple edges to the same node.
771 ///
772 /// - Distribute the mass accordingly, dithering to minimize mass loss,
773 /// as described in \a distributeMass().
774 ///
775 /// In the case of irreducible loops, instead of a single loop header,
776 /// there will be several. The computation of backedge masses is similar
777 /// but instead of having a single backedge mass, there will be one
778 /// backedge per loop header. In these cases, each backedge will carry
779 /// a mass proportional to the edge weights along the corresponding
780 /// path.
781 ///
782 /// At the end of propagation, the full mass assigned to the loop will be
783 /// distributed among the loop headers proportionally according to the
784 /// mass flowing through their backedges.
785 ///
786 /// Finally, calculate the loop scale from the accumulated backedge mass.
787 ///
788 /// 3. Distribute mass in the function (\a computeMassInFunction()).
789 ///
790 /// Finally, distribute mass through the DAG resulting from packaging all
791 /// loops in the function. This uses the same algorithm as distributing
792 /// mass in a loop, except that there are no exit or backedge edges.
793 ///
794 /// 4. Unpackage loops (\a unwrapLoops()).
795 ///
796 /// Initialize each block's frequency to a floating point representation of
797 /// its mass.
798 ///
799 /// Visit loops top-down, scaling the frequencies of its immediate members
800 /// by the loop's pseudo-node's frequency.
801 ///
802 /// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()).
803 ///
804 /// Using the min and max frequencies as a guide, translate floating point
805 /// frequencies to an appropriate range in uint64_t.
806 ///
807 /// It has some known flaws.
808 ///
809 /// - The model of irreducible control flow is a rough approximation.
810 ///
811 /// Modelling irreducible control flow exactly involves setting up and
812 /// solving a group of infinite geometric series. Such precision is
813 /// unlikely to be worthwhile, since most of our algorithms give up on
814 /// irreducible control flow anyway.
815 ///
816 /// Nevertheless, we might find that we need to get closer. Here's a sort
817 /// of TODO list for the model with diminishing returns, to be completed as
818 /// necessary.
819 ///
820 /// - The headers for the \a LoopData representing an irreducible SCC
821 /// include non-entry blocks. When these extra blocks exist, they
822 /// indicate a self-contained irreducible sub-SCC. We could treat them
823 /// as sub-loops, rather than arbitrarily shoving the problematic
824 /// blocks into the headers of the main irreducible SCC.
825 ///
826 /// - Entry frequencies are assumed to be evenly split between the
827 /// headers of a given irreducible SCC, which is the only option if we
828 /// need to compute mass in the SCC before its parent loop. Instead,
829 /// we could partially compute mass in the parent loop, and stop when
830 /// we get to the SCC. Here, we have the correct ratio of entry
831 /// masses, which we can use to adjust their relative frequencies.
832 /// Compute mass in the SCC, and then continue propagation in the
833 /// parent.
834 ///
835 /// - We can propagate mass iteratively through the SCC, for some fixed
836 /// number of iterations. Each iteration starts by assigning the entry
837 /// blocks their backedge mass from the prior iteration. The final
838 /// mass for each block (and each exit, and the total backedge mass
839 /// used for computing loop scale) is the sum of all iterations.
840 /// (Running this until fixed point would "solve" the geometric
841 /// series by simulation.)
842 template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
843  // This is part of a workaround for a GCC 4.7 crash on lambdas.
845 
846  using BlockT = typename bfi_detail::TypeMap<BT>::BlockT;
847  using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT;
848  using BranchProbabilityInfoT =
850  using LoopT = typename bfi_detail::TypeMap<BT>::LoopT;
851  using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT;
854 
855  const BranchProbabilityInfoT *BPI = nullptr;
856  const LoopInfoT *LI = nullptr;
857  const FunctionT *F = nullptr;
858 
859  // All blocks in reverse postorder.
860  std::vector<const BlockT *> RPOT;
862 
863  using rpot_iterator = typename std::vector<const BlockT *>::const_iterator;
864 
865  rpot_iterator rpot_begin() const { return RPOT.begin(); }
866  rpot_iterator rpot_end() const { return RPOT.end(); }
867 
868  size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); }
869 
870  BlockNode getNode(const rpot_iterator &I) const {
871  return BlockNode(getIndex(I));
872  }
873  BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); }
874 
875  const BlockT *getBlock(const BlockNode &Node) const {
876  assert(Node.Index < RPOT.size());
877  return RPOT[Node.Index];
878  }
879 
880  /// Run (and save) a post-order traversal.
881  ///
882  /// Saves a reverse post-order traversal of all the nodes in \a F.
883  void initializeRPOT();
884 
885  /// Initialize loop data.
886  ///
887  /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from
888  /// each block to the deepest loop it's in, but we need the inverse. For each
889  /// loop, we store in reverse post-order its "immediate" members, defined as
890  /// the header, the headers of immediate sub-loops, and all other blocks in
891  /// the loop that are not in sub-loops.
892  void initializeLoops();
893 
894  /// Propagate to a block's successors.
895  ///
896  /// In the context of distributing mass through \c OuterLoop, divide the mass
897  /// currently assigned to \c Node between its successors.
898  ///
899  /// \return \c true unless there's an irreducible backedge.
900  bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node);
901 
902  /// Compute mass in a particular loop.
903  ///
904  /// Assign mass to \c Loop's header, and then for each block in \c Loop in
905  /// reverse post-order, distribute mass to its successors. Only visits nodes
906  /// that have not been packaged into sub-loops.
907  ///
908  /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop.
909  /// \return \c true unless there's an irreducible backedge.
910  bool computeMassInLoop(LoopData &Loop);
911 
912  /// Try to compute mass in the top-level function.
913  ///
914  /// Assign mass to the entry block, and then for each block in reverse
915  /// post-order, distribute mass to its successors. Skips nodes that have
916  /// been packaged into loops.
917  ///
918  /// \pre \a computeMassInLoops() has been called.
919  /// \return \c true unless there's an irreducible backedge.
920  bool tryToComputeMassInFunction();
921 
922  /// Compute mass in (and package up) irreducible SCCs.
923  ///
924  /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front
925  /// of \c Insert), and call \a computeMassInLoop() on each of them.
926  ///
927  /// If \c OuterLoop is \c nullptr, it refers to the top-level function.
928  ///
929  /// \pre \a computeMassInLoop() has been called for each subloop of \c
930  /// OuterLoop.
931  /// \pre \c Insert points at the last loop successfully processed by \a
932  /// computeMassInLoop().
933  /// \pre \c OuterLoop has irreducible SCCs.
934  void computeIrreducibleMass(LoopData *OuterLoop,
935  std::list<LoopData>::iterator Insert);
936 
937  /// Compute mass in all loops.
938  ///
939  /// For each loop bottom-up, call \a computeMassInLoop().
940  ///
941  /// \a computeMassInLoop() aborts (and returns \c false) on loops that
942  /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then
943  /// re-enter \a computeMassInLoop().
944  ///
945  /// \post \a computeMassInLoop() has returned \c true for every loop.
946  void computeMassInLoops();
947 
948  /// Compute mass in the top-level function.
949  ///
950  /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to
951  /// compute mass in the top-level function.
952  ///
953  /// \post \a tryToComputeMassInFunction() has returned \c true.
954  void computeMassInFunction();
955 
956  std::string getBlockName(const BlockNode &Node) const override {
957  return bfi_detail::getBlockName(getBlock(Node));
958  }
959 
960 public:
961  BlockFrequencyInfoImpl() = default;
962 
963  const FunctionT *getFunction() const { return F; }
964 
965  void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
966  const LoopInfoT &LI);
967 
969 
970  BlockFrequency getBlockFreq(const BlockT *BB) const {
971  return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
972  }
973 
975  const BlockT *BB) const {
977  }
978 
980  uint64_t Freq) const {
982  }
983 
984  bool isIrrLoopHeader(const BlockT *BB) {
986  }
987 
988  void setBlockFreq(const BlockT *BB, uint64_t Freq);
989 
990  Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
992  }
993 
994  const BranchProbabilityInfoT &getBPI() const { return *BPI; }
995 
996  /// Print the frequencies for the current function.
997  ///
998  /// Prints the frequencies for the blocks in the current function.
999  ///
1000  /// Blocks are printed in the natural iteration order of the function, rather
1001  /// than reverse post-order. This provides two advantages: writing -analyze
1002  /// tests is easier (since blocks come out in source order), and even
1003  /// unreachable blocks are printed.
1004  ///
1005  /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so
1006  /// we need to override it here.
1007  raw_ostream &print(raw_ostream &OS) const override;
1008 
1011 
1012  raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const {
1013  return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB));
1014  }
1015 };
1016 
1017 template <class BT>
1019  const BranchProbabilityInfoT &BPI,
1020  const LoopInfoT &LI) {
1021  // Save the parameters.
1022  this->BPI = &BPI;
1023  this->LI = &LI;
1024  this->F = &F;
1025 
1026  // Clean up left-over data structures.
1028  RPOT.clear();
1029  Nodes.clear();
1030 
1031  // Initialize.
1032  LLVM_DEBUG(dbgs() << "\nblock-frequency: " << F.getName()
1033  << "\n================="
1034  << std::string(F.getName().size(), '=') << "\n");
1035  initializeRPOT();
1036  initializeLoops();
1037 
1038  // Visit loops in post-order to find the local mass distribution, and then do
1039  // the full function.
1040  computeMassInLoops();
1041  computeMassInFunction();
1042  unwrapLoops();
1043  finalizeMetrics();
1044 }
1045 
1046 template <class BT>
1047 void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) {
1048  if (Nodes.count(BB))
1049  BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq);
1050  else {
1051  // If BB is a newly added block after BFI is done, we need to create a new
1052  // BlockNode for it assigned with a new index. The index can be determined
1053  // by the size of Freqs.
1054  BlockNode NewNode(Freqs.size());
1055  Nodes[BB] = NewNode;
1056  Freqs.emplace_back();
1058  }
1059 }
1060 
1061 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
1062  const BlockT *Entry = &F->front();
1063  RPOT.reserve(F->size());
1064  std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT));
1065  std::reverse(RPOT.begin(), RPOT.end());
1066 
1067  assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() &&
1068  "More nodes in function than Block Frequency Info supports");
1069 
1070  LLVM_DEBUG(dbgs() << "reverse-post-order-traversal\n");
1071  for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
1072  BlockNode Node = getNode(I);
1073  LLVM_DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node)
1074  << "\n");
1075  Nodes[*I] = Node;
1076  }
1077 
1078  Working.reserve(RPOT.size());
1079  for (size_t Index = 0; Index < RPOT.size(); ++Index)
1080  Working.emplace_back(Index);
1081  Freqs.resize(RPOT.size());
1082 }
1083 
1084 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() {
1085  LLVM_DEBUG(dbgs() << "loop-detection\n");
1086  if (LI->empty())
1087  return;
1088 
1089  // Visit loops top down and assign them an index.
1090  std::deque<std::pair<const LoopT *, LoopData *>> Q;
1091  for (const LoopT *L : *LI)
1092  Q.emplace_back(L, nullptr);
1093  while (!Q.empty()) {
1094  const LoopT *Loop = Q.front().first;
1095  LoopData *Parent = Q.front().second;
1096  Q.pop_front();
1097 
1098  BlockNode Header = getNode(Loop->getHeader());
1099  assert(Header.isValid());
1100 
1101  Loops.emplace_back(Parent, Header);
1102  Working[Header.Index].Loop = &Loops.back();
1103  LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n");
1104 
1105  for (const LoopT *L : *Loop)
1106  Q.emplace_back(L, &Loops.back());
1107  }
1108 
1109  // Visit nodes in reverse post-order and add them to their deepest containing
1110  // loop.
1111  for (size_t Index = 0; Index < RPOT.size(); ++Index) {
1112  // Loop headers have already been mostly mapped.
1113  if (Working[Index].isLoopHeader()) {
1114  LoopData *ContainingLoop = Working[Index].getContainingLoop();
1115  if (ContainingLoop)
1116  ContainingLoop->Nodes.push_back(Index);
1117  continue;
1118  }
1119 
1120  const LoopT *Loop = LI->getLoopFor(RPOT[Index]);
1121  if (!Loop)
1122  continue;
1123 
1124  // Add this node to its containing loop's member list.
1125  BlockNode Header = getNode(Loop->getHeader());
1126  assert(Header.isValid());
1127  const auto &HeaderData = Working[Header.Index];
1128  assert(HeaderData.isLoopHeader());
1129 
1130  Working[Index].Loop = HeaderData.Loop;
1131  HeaderData.Loop->Nodes.push_back(Index);
1132  LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header)
1133  << ": member = " << getBlockName(Index) << "\n");
1134  }
1135 }
1136 
1137 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
1138  // Visit loops with the deepest first, and the top-level loops last.
1139  for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
1140  if (computeMassInLoop(*L))
1141  continue;
1142  auto Next = std::next(L);
1143  computeIrreducibleMass(&*L, L.base());
1144  L = std::prev(Next);
1145  if (computeMassInLoop(*L))
1146  continue;
1147  llvm_unreachable("unhandled irreducible control flow");
1148  }
1149 }
1150 
1151 template <class BT>
1153  // Compute mass in loop.
1154  LLVM_DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");
1155 
1156  if (Loop.isIrreducible()) {
1157  LLVM_DEBUG(dbgs() << "isIrreducible = true\n");
1158  Distribution Dist;
1159  unsigned NumHeadersWithWeight = 0;
1160  Optional<uint64_t> MinHeaderWeight;
1161  DenseSet<uint32_t> HeadersWithoutWeight;
1162  HeadersWithoutWeight.reserve(Loop.NumHeaders);
1163  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1164  auto &HeaderNode = Loop.Nodes[H];
1165  const BlockT *Block = getBlock(HeaderNode);
1166  IsIrrLoopHeader.set(Loop.Nodes[H].Index);
1167  Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
1168  if (!HeaderWeight) {
1169  LLVM_DEBUG(dbgs() << "Missing irr loop header metadata on "
1170  << getBlockName(HeaderNode) << "\n");
1171  HeadersWithoutWeight.insert(H);
1172  continue;
1173  }
1174  LLVM_DEBUG(dbgs() << getBlockName(HeaderNode)
1175  << " has irr loop header weight "
1176  << HeaderWeight.getValue() << "\n");
1177  NumHeadersWithWeight++;
1178  uint64_t HeaderWeightValue = HeaderWeight.getValue();
1179  if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
1180  MinHeaderWeight = HeaderWeightValue;
1181  if (HeaderWeightValue) {
1182  Dist.addLocal(HeaderNode, HeaderWeightValue);
1183  }
1184  }
1185  // As a heuristic, if some headers don't have a weight, give them the
1186  // minimium weight seen (not to disrupt the existing trends too much by
1187  // using a weight that's in the general range of the other headers' weights,
1188  // and the minimum seems to perform better than the average.)
1189  // FIXME: better update in the passes that drop the header weight.
1190  // If no headers have a weight, give them even weight (use weight 1).
1191  if (!MinHeaderWeight)
1192  MinHeaderWeight = 1;
1193  for (uint32_t H : HeadersWithoutWeight) {
1194  auto &HeaderNode = Loop.Nodes[H];
1195  assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() &&
1196  "Shouldn't have a weight metadata");
1197  uint64_t MinWeight = MinHeaderWeight.getValue();
1198  LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to "
1199  << getBlockName(HeaderNode) << "\n");
1200  if (MinWeight)
1201  Dist.addLocal(HeaderNode, MinWeight);
1202  }
1203  distributeIrrLoopHeaderMass(Dist);
1204  for (const BlockNode &M : Loop.Nodes)
1205  if (!propagateMassToSuccessors(&Loop, M))
1206  llvm_unreachable("unhandled irreducible control flow");
1207  if (NumHeadersWithWeight == 0)
1208  // No headers have a metadata. Adjust header mass.
1209  adjustLoopHeaderMass(Loop);
1210  } else {
1211  Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
1212  if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
1213  llvm_unreachable("irreducible control flow to loop header!?");
1214  for (const BlockNode &M : Loop.members())
1215  if (!propagateMassToSuccessors(&Loop, M))
1216  // Irreducible backedge.
1217  return false;
1218  }
1219 
1220  computeLoopScale(Loop);
1221  packageLoop(Loop);
1222  return true;
1223 }
1224 
1225 template <class BT>
1227  // Compute mass in function.
1228  LLVM_DEBUG(dbgs() << "compute-mass-in-function\n");
1229  assert(!Working.empty() && "no blocks in function");
1230  assert(!Working[0].isLoopHeader() && "entry block is a loop header");
1231 
1232  Working[0].getMass() = BlockMass::getFull();
1233  for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) {
1234  // Check for nodes that have been packaged.
1235  BlockNode Node = getNode(I);
1236  if (Working[Node.Index].isPackaged())
1237  continue;
1238 
1239  if (!propagateMassToSuccessors(nullptr, Node))
1240  return false;
1241  }
1242  return true;
1243 }
1244 
1245 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
1246  if (tryToComputeMassInFunction())
1247  return;
1248  computeIrreducibleMass(nullptr, Loops.begin());
1249  if (tryToComputeMassInFunction())
1250  return;
1251  llvm_unreachable("unhandled irreducible control flow");
1252 }
1253 
1254 /// \note This should be a lambda, but that crashes GCC 4.7.
1255 namespace bfi_detail {
1256 
1257 template <class BT> struct BlockEdgesAdder {
1258  using BlockT = BT;
1261 
1263 
1265  : BFI(BFI) {}
1266 
1268  const LoopData *OuterLoop) {
1269  const BlockT *BB = BFI.RPOT[Irr.Node.Index];
1270  for (const auto Succ : children<const BlockT *>(BB))
1271  G.addEdge(Irr, BFI.getNode(Succ), OuterLoop);
1272  }
1273 };
1274 
1275 } // end namespace bfi_detail
1276 
1277 template <class BT>
1279  LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
1280  LLVM_DEBUG(dbgs() << "analyze-irreducible-in-";
1281  if (OuterLoop) dbgs()
1282  << "loop: " << getLoopName(*OuterLoop) << "\n";
1283  else dbgs() << "function\n");
1284 
1285  using namespace bfi_detail;
1286 
1287  // Ideally, addBlockEdges() would be declared here as a lambda, but that
1288  // crashes GCC 4.7.
1289  BlockEdgesAdder<BT> addBlockEdges(*this);
1290  IrreducibleGraph G(*this, OuterLoop, addBlockEdges);
1291 
1292  for (auto &L : analyzeIrreducible(G, OuterLoop, Insert))
1293  computeMassInLoop(L);
1294 
1295  if (!OuterLoop)
1296  return;
1297  updateLoopWithIrreducible(*OuterLoop);
1298 }
1299 
1300 // A helper function that converts a branch probability into weight.
1302  return Prob.getNumerator();
1303 }
1304 
1305 template <class BT>
1306 bool
1308  const BlockNode &Node) {
1309  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n");
1310  // Calculate probability for successors.
1311  Distribution Dist;
1312  if (auto *Loop = Working[Node.Index].getPackagedLoop()) {
1313  assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop");
1314  if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist))
1315  // Irreducible backedge.
1316  return false;
1317  } else {
1318  const BlockT *BB = getBlock(Node);
1321  SI != SE; ++SI)
1322  if (!addToDist(
1323  Dist, OuterLoop, Node, getNode(*SI),
1324  getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
1325  // Irreducible backedge.
1326  return false;
1327  }
1328 
1329  // Distribute mass to successors, saving exit and backedge data in the
1330  // loop header.
1331  distributeMass(Node, OuterLoop, Dist);
1332  return true;
1333 }
1334 
1335 template <class BT>
1337  if (!F)
1338  return OS;
1339  OS << "block-frequency-info: " << F->getName() << "\n";
1340  for (const BlockT &BB : *F) {
1341  OS << " - " << bfi_detail::getBlockName(&BB) << ": float = ";
1342  getFloatingBlockFreq(&BB).print(OS, 5)
1343  << ", int = " << getBlockFreq(&BB).getFrequency();
1346  F->getFunction(), getNode(&BB)))
1347  OS << ", count = " << ProfileCount.getValue();
1348  if (Optional<uint64_t> IrrLoopHeaderWeight =
1349  BB.getIrrLoopHeaderWeight())
1350  OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue();
1351  OS << "\n";
1352  }
1353 
1354  // Add an extra newline for readability.
1355  OS << "\n";
1356  return OS;
1357 }
1358 
1359 // Graph trait base class for block frequency information graph
1360 // viewer.
1361 
1363 
1364 template <class BlockFrequencyInfoT, class BranchProbabilityInfoT>
1367  using NodeRef = typename GTraits::NodeRef;
1368  using EdgeIter = typename GTraits::ChildIteratorType;
1369  using NodeIter = typename GTraits::nodes_iterator;
1370 
1371  uint64_t MaxFrequency = 0;
1372 
1373  explicit BFIDOTGraphTraitsBase(bool isSimple = false)
1375 
1376  static std::string getGraphName(const BlockFrequencyInfoT *G) {
1377  return G->getFunction()->getName();
1378  }
1379 
1380  std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph,
1381  unsigned HotPercentThreshold = 0) {
1382  std::string Result;
1383  if (!HotPercentThreshold)
1384  return Result;
1385 
1386  // Compute MaxFrequency on the fly:
1387  if (!MaxFrequency) {
1388  for (NodeIter I = GTraits::nodes_begin(Graph),
1389  E = GTraits::nodes_end(Graph);
1390  I != E; ++I) {
1391  NodeRef N = *I;
1392  MaxFrequency =
1393  std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency());
1394  }
1395  }
1396  BlockFrequency Freq = Graph->getBlockFreq(Node);
1397  BlockFrequency HotFreq =
1398  (BlockFrequency(MaxFrequency) *
1399  BranchProbability::getBranchProbability(HotPercentThreshold, 100));
1400 
1401  if (Freq < HotFreq)
1402  return Result;
1403 
1404  raw_string_ostream OS(Result);
1405  OS << "color=\"red\"";
1406  OS.flush();
1407  return Result;
1408  }
1409 
1410  std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
1411  GVDAGType GType, int layout_order = -1) {
1412  std::string Result;
1413  raw_string_ostream OS(Result);
1414 
1415  if (layout_order != -1)
1416  OS << Node->getName() << "[" << layout_order << "] : ";
1417  else
1418  OS << Node->getName() << " : ";
1419  switch (GType) {
1420  case GVDT_Fraction:
1421  Graph->printBlockFreq(OS, Node);
1422  break;
1423  case GVDT_Integer:
1424  OS << Graph->getBlockFreq(Node).getFrequency();
1425  break;
1426  case GVDT_Count: {
1427  auto Count = Graph->getBlockProfileCount(Node);
1428  if (Count)
1429  OS << Count.getValue();
1430  else
1431  OS << "Unknown";
1432  break;
1433  }
1434  case GVDT_None:
1435  llvm_unreachable("If we are not supposed to render a graph we should "
1436  "never reach this point.");
1437  }
1438  return Result;
1439  }
1440 
1441  std::string getEdgeAttributes(NodeRef Node, EdgeIter EI,
1442  const BlockFrequencyInfoT *BFI,
1443  const BranchProbabilityInfoT *BPI,
1444  unsigned HotPercentThreshold = 0) {
1445  std::string Str;
1446  if (!BPI)
1447  return Str;
1448 
1449  BranchProbability BP = BPI->getEdgeProbability(Node, EI);
1450  uint32_t N = BP.getNumerator();
1451  uint32_t D = BP.getDenominator();
1452  double Percent = 100.0 * N / D;
1453  raw_string_ostream OS(Str);
1454  OS << format("label=\"%.1f%%\"", Percent);
1455 
1456  if (HotPercentThreshold) {
1457  BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP;
1458  BlockFrequency HotFreq = BlockFrequency(MaxFrequency) *
1459  BranchProbability(HotPercentThreshold, 100);
1460 
1461  if (EFreq >= HotFreq) {
1462  OS << ",color=\"red\"";
1463  }
1464  }
1465 
1466  OS.flush();
1467  return Str;
1468  }
1469 };
1470 
1471 } // end namespace llvm
1472 
1473 #undef DEBUG_TYPE
1474 
1475 #endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
BlockMass operator+(BlockMass L, BlockMass R)
void setBlockFreq(const BlockT *BB, uint64_t Freq)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool IsPackaged
Whether this has been packaged.
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:227
bool operator<=(BlockMass X) const
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:327
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Scaled64 getFloatingBlockFreq(const BlockT *BB) const
bool isIrrLoopHeader(const BlockT *BB)
Various leaf nodes.
Definition: ISDOpcodes.h:59
LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, It2 LastOther)
void push_back(const T &Elt)
Definition: SmallVector.h:217
NodeList::const_iterator members_begin() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:123
static std::string getGraphName(const BlockFrequencyInfoT *G)
BlockFrequency getBlockFreq(const BlockT *BB) const
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockT *BB) const
raw_ostream & print(raw_ostream &OS) const
const BranchProbabilityInfoT & getBPI() const
void addLocal(const BlockNode &Node, uint64_t Amount)
F(f)
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
Construct an explicit graph containing irreducible control flow.
const BlockFrequencyInfoImpl< BT > & BFI
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
BFIDOTGraphTraitsBase(bool isSimple=false)
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, const LoopData *OuterLoop)
Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
NodeList::const_iterator members_end() const
Hexagon Hardware Loops
uint32_t getWeightFromBranchProb(const BranchProbability Prob)
BlockMass & getMass()
Get the appropriate mass for a node.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
bool isAPackage() const
Has Loop been packaged up?
virtual raw_ostream & print(raw_ostream &OS) const
std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph, GVDAGType GType, int layout_order=-1)
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
BlockMass & operator*=(BranchProbability P)
bool operator>=(BlockMass X) const
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
bool operator==(BlockMass X) const
Graph of irreducible control flow.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:162
std::deque< const IrrNode * >::const_iterator iterator
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1281
void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, const LoopInfoT &LI)
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
raw_ostream & print(raw_ostream &OS) const override
Print the frequencies for the current function.
void addBackedge(const BlockNode &Node, uint64_t Amount)
void initialize(const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
bool isHeader(const BlockNode &Node) const
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
bool isIrrLoopHeader(const BlockNode &Node)
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
typename GraphType::UnknownGraphTypeError NodeRef
Definition: GraphTraits.h:78
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
BlockMass & operator-=(BlockMass X)
Subtract another mass.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
BlockMass & operator+=(BlockMass X)
Add another mass.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:128
#define H(x, y, z)
Definition: MD5.cpp:57
Distribution of unscaled probability weight.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
po_iterator< T > po_end(const T &G)
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
BlockNode getResolvedNode() const
Resolve a node to its representative.
BlockMass Mass
Mass distribution from the entry block.
typename GTraits::NodeRef NodeRef
BlockEdgesAdder(const BlockFrequencyInfoImpl< BT > &BFI)
BlockMass operator*(BlockMass L, BranchProbability R)
Class to represent profile counts.
Definition: Function.h:260
size_t size() const
Definition: SmallVector.h:52
std::string getEdgeAttributes(NodeRef Node, EdgeIter EI, const BlockFrequencyInfoT *BFI, const BranchProbabilityInfoT *BPI, unsigned HotPercentThreshold=0)
raw_ostream & operator<<(raw_ostream &OS, BlockMass X)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addExit(const BlockNode &Node, uint64_t Amount)
std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph, unsigned HotPercentThreshold=0)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
isPodLike - This is a type trait that is used to determine whether a given type can be copied around ...
Definition: ArrayRef.h:529
static uint32_t getDenominator()
bool isADoublePackage() const
Has Loop been packaged up twice?
ExitMap Exits
Successor edges (and weights).
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
const DataFlowGraph & G
Definition: RDFGraph.cpp:210
BlockMass operator-(BlockMass L, BlockMass R)
uint64_t scale(uint64_t Num) const
Scale a large integer.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringRef > StandardNames)
Initialize the set of available library functions based on the specified target triple.
LoopData(LoopData *Parent, const BlockNode &Header)
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:477
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:132
Analysis providing branch probability information.
bool operator>(BlockMass X) const
void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
iterator begin()
Definition: DenseMap.h:99
BitTracker BT
Definition: BitTracker.cpp:73
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockT *BB) const
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:213
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again...
Definition: DenseSet.h:84
typename GTraits::ChildIteratorType EdgeIter
typename GTraits::nodes_iterator NodeIter
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:170
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:210
iterator_range< NodeList::const_iterator > members() const
bool operator!=(BlockMass X) const
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
bool operator<(BlockMass X) const
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
const FunctionT * getFunction() const
Shared implementation for block frequency analysis.
BlockFrequency getBlockFreq(const BlockNode &Node) const
Base class for BlockFrequencyInfoImpl.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1237
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
LoopData & getLoopPackage(const BlockNode &Head)
uint32_t getNumerator() const
bool isPackaged() const
Has ContainingLoop been packaged up?
po_iterator< T > po_begin(const T &G)
WeightList Weights
Individual successor weights.
void resize(size_type N)
Definition: SmallVector.h:350