LLVM  13.0.0git
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loops should be simplified before this analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/SCCIterator.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/IR/Function.h"
23 #include "llvm/Support/Compiler.h"
24 #include "llvm/Support/Debug.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <list>
34 #include <numeric>
35 #include <utility>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::bfi_detail;
40 
41 #define DEBUG_TYPE "block-freq"
42 
43 namespace llvm {
45  "check-bfi-unknown-block-queries",
46  cl::init(false), cl::Hidden,
47  cl::desc("Check if block frequency is queried for an unknown block "
48  "for debugging missed BFI updates"));
49 
51  "use-iterative-bfi-inference", cl::init(false), cl::Hidden, cl::ZeroOrMore,
52  cl::desc("Apply an iterative post-processing to infer correct BFI counts"));
53 
55  "iterative-bfi-max-iterations-per-block", cl::init(1000), cl::Hidden,
56  cl::desc("Iterative inference: maximum number of update iterations "
57  "per block"));
58 
60  "iterative-bfi-precision", cl::init(1e-12), cl::Hidden,
61  cl::desc("Iterative inference: delta convergence precision; smaller values "
62  "typically lead to better results at the cost of worsen runtime"));
63 }
64 
66  if (isFull())
67  return ScaledNumber<uint64_t>(1, 0);
68  return ScaledNumber<uint64_t>(getMass() + 1, -64);
69 }
70 
71 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
73 #endif
74 
75 static char getHexDigit(int N) {
76  assert(N < 16);
77  if (N < 10)
78  return '0' + N;
79  return 'a' + N - 10;
80 }
81 
83  for (int Digits = 0; Digits < 16; ++Digits)
84  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
85  return OS;
86 }
87 
88 namespace {
89 
91 using Distribution = BlockFrequencyInfoImplBase::Distribution;
96 using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
97 
98 /// Dithering mass distributer.
99 ///
100 /// This class splits up a single mass into portions by weight, dithering to
101 /// spread out error. No mass is lost. The dithering precision depends on the
102 /// precision of the product of \a BlockMass and \a BranchProbability.
103 ///
104 /// The distribution algorithm follows.
105 ///
106 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
107 /// mass to distribute in \a RemMass.
108 ///
109 /// 2. For each portion:
110 ///
111 /// 1. Construct a branch probability, P, as the portion's weight divided
112 /// by the current value of \a RemWeight.
113 /// 2. Calculate the portion's mass as \a RemMass times P.
114 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
115 /// the current portion's weight and mass.
116 struct DitheringDistributer {
117  uint32_t RemWeight;
118  BlockMass RemMass;
119 
120  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
121 
122  BlockMass takeMass(uint32_t Weight);
123 };
124 
125 } // end anonymous namespace
126 
127 DitheringDistributer::DitheringDistributer(Distribution &Dist,
128  const BlockMass &Mass) {
129  Dist.normalize();
130  RemWeight = Dist.Total;
131  RemMass = Mass;
132 }
133 
134 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
135  assert(Weight && "invalid weight");
136  assert(Weight <= RemWeight);
137  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
138 
139  // Decrement totals (dither).
140  RemWeight -= Weight;
141  RemMass -= Mass;
142  return Mass;
143 }
144 
145 void Distribution::add(const BlockNode &Node, uint64_t Amount,
146  Weight::DistType Type) {
147  assert(Amount && "invalid weight of 0");
148  uint64_t NewTotal = Total + Amount;
149 
150  // Check for overflow. It should be impossible to overflow twice.
151  bool IsOverflow = NewTotal < Total;
152  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
153  DidOverflow |= IsOverflow;
154 
155  // Update the total.
156  Total = NewTotal;
157 
158  // Save the weight.
159  Weights.push_back(Weight(Type, Node, Amount));
160 }
161 
162 static void combineWeight(Weight &W, const Weight &OtherW) {
163  assert(OtherW.TargetNode.isValid());
164  if (!W.Amount) {
165  W = OtherW;
166  return;
167  }
168  assert(W.Type == OtherW.Type);
169  assert(W.TargetNode == OtherW.TargetNode);
170  assert(OtherW.Amount && "Expected non-zero weight");
171  if (W.Amount > W.Amount + OtherW.Amount)
172  // Saturate on overflow.
173  W.Amount = UINT64_MAX;
174  else
175  W.Amount += OtherW.Amount;
176 }
177 
178 static void combineWeightsBySorting(WeightList &Weights) {
179  // Sort so edges to the same node are adjacent.
180  llvm::sort(Weights, [](const Weight &L, const Weight &R) {
181  return L.TargetNode < R.TargetNode;
182  });
183 
184  // Combine adjacent edges.
185  WeightList::iterator O = Weights.begin();
186  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
187  ++O, (I = L)) {
188  *O = *I;
189 
190  // Find the adjacent weights to the same node.
191  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
192  combineWeight(*O, *L);
193  }
194 
195  // Erase extra entries.
196  Weights.erase(O, Weights.end());
197 }
198 
199 static void combineWeightsByHashing(WeightList &Weights) {
200  // Collect weights into a DenseMap.
201  using HashTable = DenseMap<BlockNode::IndexType, Weight>;
202 
203  HashTable Combined(NextPowerOf2(2 * Weights.size()));
204  for (const Weight &W : Weights)
205  combineWeight(Combined[W.TargetNode.Index], W);
206 
207  // Check whether anything changed.
208  if (Weights.size() == Combined.size())
209  return;
210 
211  // Fill in the new weights.
212  Weights.clear();
213  Weights.reserve(Combined.size());
214  for (const auto &I : Combined)
215  Weights.push_back(I.second);
216 }
217 
218 static void combineWeights(WeightList &Weights) {
219  // Use a hash table for many successors to keep this linear.
220  if (Weights.size() > 128) {
221  combineWeightsByHashing(Weights);
222  return;
223  }
224 
225  combineWeightsBySorting(Weights);
226 }
227 
228 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
229  assert(Shift >= 0);
230  assert(Shift < 64);
231  if (!Shift)
232  return N;
233  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
234 }
235 
236 void Distribution::normalize() {
237  // Early exit for termination nodes.
238  if (Weights.empty())
239  return;
240 
241  // Only bother if there are multiple successors.
242  if (Weights.size() > 1)
243  combineWeights(Weights);
244 
245  // Early exit when combined into a single successor.
246  if (Weights.size() == 1) {
247  Total = 1;
248  Weights.front().Amount = 1;
249  return;
250  }
251 
252  // Determine how much to shift right so that the total fits into 32-bits.
253  //
254  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
255  // for each weight can cause a 32-bit overflow.
256  int Shift = 0;
257  if (DidOverflow)
258  Shift = 33;
259  else if (Total > UINT32_MAX)
260  Shift = 33 - countLeadingZeros(Total);
261 
262  // Early exit if nothing needs to be scaled.
263  if (!Shift) {
264  // If we didn't overflow then combineWeights() shouldn't have changed the
265  // sum of the weights, but let's double-check.
266  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
267  [](uint64_t Sum, const Weight &W) {
268  return Sum + W.Amount;
269  }) &&
270  "Expected total to be correct");
271  return;
272  }
273 
274  // Recompute the total through accumulation (rather than shifting it) so that
275  // it's accurate after shifting and any changes combineWeights() made above.
276  Total = 0;
277 
278  // Sum the weights to each node and shift right if necessary.
279  for (Weight &W : Weights) {
280  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
281  // can round here without concern about overflow.
282  assert(W.TargetNode.isValid());
283  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
284  assert(W.Amount <= UINT32_MAX);
285 
286  // Update the total.
287  Total += W.Amount;
288  }
289  assert(Total <= UINT32_MAX);
290 }
291 
293  // Swap with a default-constructed std::vector, since std::vector<>::clear()
294  // does not actually clear heap storage.
295  std::vector<FrequencyData>().swap(Freqs);
296  IsIrrLoopHeader.clear();
297  std::vector<WorkingData>().swap(Working);
298  Loops.clear();
299 }
300 
301 /// Clear all memory not needed downstream.
302 ///
303 /// Releases all memory not used downstream. In particular, saves Freqs.
305  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
306  SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
307  BFI.clear();
308  BFI.Freqs = std::move(SavedFreqs);
309  BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
310 }
311 
313  const LoopData *OuterLoop,
314  const BlockNode &Pred,
315  const BlockNode &Succ,
316  uint64_t Weight) {
317  if (!Weight)
318  Weight = 1;
319 
320  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
321  return OuterLoop && OuterLoop->isHeader(Node);
322  };
323 
324  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
325 
326 #ifndef NDEBUG
327  auto debugSuccessor = [&](const char *Type) {
328  dbgs() << " =>"
329  << " [" << Type << "] weight = " << Weight;
330  if (!isLoopHeader(Resolved))
331  dbgs() << ", succ = " << getBlockName(Succ);
332  if (Resolved != Succ)
333  dbgs() << ", resolved = " << getBlockName(Resolved);
334  dbgs() << "\n";
335  };
336  (void)debugSuccessor;
337 #endif
338 
339  if (isLoopHeader(Resolved)) {
340  LLVM_DEBUG(debugSuccessor("backedge"));
341  Dist.addBackedge(Resolved, Weight);
342  return true;
343  }
344 
345  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
346  LLVM_DEBUG(debugSuccessor(" exit "));
347  Dist.addExit(Resolved, Weight);
348  return true;
349  }
350 
351  if (Resolved < Pred) {
352  if (!isLoopHeader(Pred)) {
353  // If OuterLoop is an irreducible loop, we can't actually handle this.
354  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
355  "unhandled irreducible control flow");
356 
357  // Irreducible backedge. Abort.
358  LLVM_DEBUG(debugSuccessor("abort!!!"));
359  return false;
360  }
361 
362  // If "Pred" is a loop header, then this isn't really a backedge; rather,
363  // OuterLoop must be irreducible. These false backedges can come only from
364  // secondary loop headers.
365  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
366  "unhandled irreducible control flow");
367  }
368 
369  LLVM_DEBUG(debugSuccessor(" local "));
370  Dist.addLocal(Resolved, Weight);
371  return true;
372 }
373 
375  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
376  // Copy the exit map into Dist.
377  for (const auto &I : Loop.Exits)
378  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
379  I.second.getMass()))
380  // Irreducible backedge.
381  return false;
382 
383  return true;
384 }
385 
386 /// Compute the loop scale for a loop.
388  // Compute loop scale.
389  LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
390 
391  // Infinite loops need special handling. If we give the back edge an infinite
392  // mass, they may saturate all the other scales in the function down to 1,
393  // making all the other region temperatures look exactly the same. Choose an
394  // arbitrary scale to avoid these issues.
395  //
396  // FIXME: An alternate way would be to select a symbolic scale which is later
397  // replaced to be the maximum of all computed scales plus 1. This would
398  // appropriately describe the loop as having a large scale, without skewing
399  // the final frequency computation.
400  const Scaled64 InfiniteLoopScale(1, 12);
401 
402  // LoopScale == 1 / ExitMass
403  // ExitMass == HeadMass - BackedgeMass
404  BlockMass TotalBackedgeMass;
405  for (auto &Mass : Loop.BackedgeMass)
406  TotalBackedgeMass += Mass;
407  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
408 
409  // Block scale stores the inverse of the scale. If this is an infinite loop,
410  // its exit mass will be zero. In this case, use an arbitrary scale for the
411  // loop scale.
412  Loop.Scale =
413  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
414 
415  LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
416  << BlockMass::getFull() << " - " << TotalBackedgeMass
417  << ")\n"
418  << " - scale = " << Loop.Scale << "\n");
419 }
420 
421 /// Package up a loop.
423  LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
424 
425  // Clear the subloop exits to prevent quadratic memory usage.
426  for (const BlockNode &M : Loop.Nodes) {
427  if (auto *Loop = Working[M.Index].getPackagedLoop())
428  Loop->Exits.clear();
429  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
430  }
431  Loop.IsPackaged = true;
432 }
433 
434 #ifndef NDEBUG
436  const DitheringDistributer &D, const BlockNode &T,
437  const BlockMass &M, const char *Desc) {
438  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
439  if (Desc)
440  dbgs() << " [" << Desc << "]";
441  if (T.isValid())
442  dbgs() << " to " << BFI.getBlockName(T);
443  dbgs() << "\n";
444 }
445 #endif
446 
448  LoopData *OuterLoop,
449  Distribution &Dist) {
450  BlockMass Mass = Working[Source.Index].getMass();
451  LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
452 
453  // Distribute mass to successors as laid out in Dist.
454  DitheringDistributer D(Dist, Mass);
455 
456  for (const Weight &W : Dist.Weights) {
457  // Check for a local edge (non-backedge and non-exit).
458  BlockMass Taken = D.takeMass(W.Amount);
459  if (W.Type == Weight::Local) {
460  Working[W.TargetNode.Index].getMass() += Taken;
461  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
462  continue;
463  }
464 
465  // Backedges and exits only make sense if we're processing a loop.
466  assert(OuterLoop && "backedge or exit outside of loop");
467 
468  // Check for a backedge.
469  if (W.Type == Weight::Backedge) {
470  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
471  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
472  continue;
473  }
474 
475  // This must be an exit.
476  assert(W.Type == Weight::Exit);
477  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
478  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
479  }
480 }
481 
483  const Scaled64 &Min, const Scaled64 &Max) {
484  // Scale the Factor to a size that creates integers. Ideally, integers would
485  // be scaled so that Max == UINT64_MAX so that they can be best
486  // differentiated. However, in the presence of large frequency values, small
487  // frequencies are scaled down to 1, making it impossible to differentiate
488  // small, unequal numbers. When the spread between Min and Max frequencies
489  // fits well within MaxBits, we make the scale be at least 8.
490  const unsigned MaxBits = 64;
491  const unsigned SpreadBits = (Max / Min).lg();
492  Scaled64 ScalingFactor;
493  if (SpreadBits <= MaxBits - 3) {
494  // If the values are small enough, make the scaling factor at least 8 to
495  // allow distinguishing small values.
496  ScalingFactor = Min.inverse();
497  ScalingFactor <<= 3;
498  } else {
499  // If the values need more than MaxBits to be represented, saturate small
500  // frequency values down to 1 by using a scaling factor that benefits large
501  // frequency values.
502  ScalingFactor = Scaled64(1, MaxBits) / Max;
503  }
504 
505  // Translate the floats to integers.
506  LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
507  << ", factor = " << ScalingFactor << "\n");
508  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
509  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
510  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
511  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
512  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
513  << ", int = " << BFI.Freqs[Index].Integer << "\n");
514  }
515 }
516 
517 /// Unwrap a loop package.
518 ///
519 /// Visits all the members of a loop, adjusting their BlockData according to
520 /// the loop's pseudo-node.
521 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
522  LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
523  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
524  << "\n");
525  Loop.Scale *= Loop.Mass.toScaled();
526  Loop.IsPackaged = false;
527  LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
528 
529  // Propagate the head scale through the loop. Since members are visited in
530  // RPO, the head scale will be updated by the loop scale first, and then the
531  // final head scale will be used for updated the rest of the members.
532  for (const BlockNode &N : Loop.Nodes) {
533  const auto &Working = BFI.Working[N.Index];
534  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
535  : BFI.Freqs[N.Index].Scaled;
536  Scaled64 New = Loop.Scale * F;
537  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
538  << New << "\n");
539  F = New;
540  }
541 }
542 
544  // Set initial frequencies from loop-local masses.
545  for (size_t Index = 0; Index < Working.size(); ++Index)
546  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
547 
548  for (LoopData &Loop : Loops)
549  unwrapLoop(*this, Loop);
550 }
551 
553  // Unwrap loop packages in reverse post-order, tracking min and max
554  // frequencies.
555  auto Min = Scaled64::getLargest();
556  auto Max = Scaled64::getZero();
557  for (size_t Index = 0; Index < Working.size(); ++Index) {
558  // Update min/max scale.
559  Min = std::min(Min, Freqs[Index].Scaled);
560  Max = std::max(Max, Freqs[Index].Scaled);
561  }
562 
563  // Convert to integers.
564  convertFloatingToInteger(*this, Min, Max);
565 
566  // Clean up data structures.
567  cleanup(*this);
568 
569  // Print out the final stats.
570  LLVM_DEBUG(dump());
571 }
572 
575  if (!Node.isValid()) {
576 #ifndef NDEBUG
578  SmallString<256> Msg;
579  raw_svector_ostream OS(Msg);
580  OS << "*** Detected BFI query for unknown block " << getBlockName(Node);
581  report_fatal_error(OS.str());
582  }
583 #endif
584  return 0;
585  }
586  return Freqs[Node.Index].Integer;
587 }
588 
591  const BlockNode &Node,
592  bool AllowSynthetic) const {
593  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
594  AllowSynthetic);
595 }
596 
599  uint64_t Freq,
600  bool AllowSynthetic) const {
601  auto EntryCount = F.getEntryCount(AllowSynthetic);
602  if (!EntryCount)
603  return None;
604  // Use 128 bit APInt to do the arithmetic to avoid overflow.
605  APInt BlockCount(128, EntryCount.getCount());
606  APInt BlockFreq(128, Freq);
607  APInt EntryFreq(128, getEntryFreq());
608  BlockCount *= BlockFreq;
609  // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
610  // lshr by 1 gives EntryFreq/2.
611  BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
612  return BlockCount.getLimitedValue();
613 }
614 
615 bool
617  if (!Node.isValid())
618  return false;
619  return IsIrrLoopHeader.test(Node.Index);
620 }
621 
622 Scaled64
624  if (!Node.isValid())
625  return Scaled64::getZero();
626  return Freqs[Node.Index].Scaled;
627 }
628 
630  uint64_t Freq) {
631  assert(Node.isValid() && "Expected valid node");
632  assert(Node.Index < Freqs.size() && "Expected legal index");
633  Freqs[Node.Index].Integer = Freq;
634 }
635 
636 std::string
638  return {};
639 }
640 
641 std::string
643  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
644 }
645 
646 raw_ostream &
648  const BlockNode &Node) const {
649  return OS << getFloatingBlockFreq(Node);
650 }
651 
652 raw_ostream &
654  const BlockFrequency &Freq) const {
655  Scaled64 Block(Freq.getFrequency(), 0);
656  Scaled64 Entry(getEntryFreq(), 0);
657 
658  return OS << Block / Entry;
659 }
660 
662  Start = OuterLoop.getHeader();
663  Nodes.reserve(OuterLoop.Nodes.size());
664  for (auto N : OuterLoop.Nodes)
665  addNode(N);
666  indexNodes();
667 }
668 
670  Start = 0;
671  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
672  if (!BFI.Working[Index].isPackaged())
673  addNode(Index);
674  indexNodes();
675 }
676 
678  for (auto &I : Nodes)
679  Lookup[I.Node.Index] = &I;
680 }
681 
683  const BFIBase::LoopData *OuterLoop) {
684  if (OuterLoop && OuterLoop->isHeader(Succ))
685  return;
686  auto L = Lookup.find(Succ.Index);
687  if (L == Lookup.end())
688  return;
689  IrrNode &SuccIrr = *L->second;
690  Irr.Edges.push_back(&SuccIrr);
691  SuccIrr.Edges.push_front(&Irr);
692  ++SuccIrr.NumIn;
693 }
694 
695 namespace llvm {
696 
697 template <> struct GraphTraits<IrreducibleGraph> {
699  using NodeRef = const GraphT::IrrNode *;
700  using ChildIteratorType = GraphT::IrrNode::iterator;
701 
702  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
703  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
704  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
705 };
706 
707 } // end namespace llvm
708 
709 /// Find extra irreducible headers.
710 ///
711 /// Find entry blocks and other blocks with backedges, which exist when \c G
712 /// contains irreducible sub-SCCs.
715  const IrreducibleGraph &G,
716  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
717  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
718  // Map from nodes in the SCC to whether it's an entry block.
720 
721  // InSCC also acts the set of nodes in the graph. Seed it.
722  for (const auto *I : SCC)
723  InSCC[I] = false;
724 
725  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
726  auto &Irr = *I->first;
727  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
728  if (InSCC.count(P))
729  continue;
730 
731  // This is an entry block.
732  I->second = true;
733  Headers.push_back(Irr.Node);
734  LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
735  << "\n");
736  break;
737  }
738  }
739  assert(Headers.size() >= 2 &&
740  "Expected irreducible CFG; -loop-info is likely invalid");
741  if (Headers.size() == InSCC.size()) {
742  // Every block is a header.
743  llvm::sort(Headers);
744  return;
745  }
746 
747  // Look for extra headers from irreducible sub-SCCs.
748  for (const auto &I : InSCC) {
749  // Entry blocks are already headers.
750  if (I.second)
751  continue;
752 
753  auto &Irr = *I.first;
754  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
755  // Skip forward edges.
756  if (P->Node < Irr.Node)
757  continue;
758 
759  // Skip predecessors from entry blocks. These can have inverted
760  // ordering.
761  if (InSCC.lookup(P))
762  continue;
763 
764  // Store the extra header.
765  Headers.push_back(Irr.Node);
766  LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
767  << "\n");
768  break;
769  }
770  if (Headers.back() == Irr.Node)
771  // Added this as a header.
772  continue;
773 
774  // This is not a header.
775  Others.push_back(Irr.Node);
776  LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
777  }
778  llvm::sort(Headers);
779  llvm::sort(Others);
780 }
781 
784  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
785  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
786  // Translate the SCC into RPO.
787  LLVM_DEBUG(dbgs() << " - found-scc\n");
788 
789  LoopData::NodeList Headers;
790  LoopData::NodeList Others;
791  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
792 
793  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
794  Headers.end(), Others.begin(), Others.end());
795 
796  // Update loop hierarchy.
797  for (const auto &N : Loop->Nodes)
798  if (BFI.Working[N.Index].isLoopHeader())
799  BFI.Working[N.Index].Loop->Parent = &*Loop;
800  else
801  BFI.Working[N.Index].Loop = &*Loop;
802 }
803 
806  const IrreducibleGraph &G, LoopData *OuterLoop,
807  std::list<LoopData>::iterator Insert) {
808  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
809  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
810 
811  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
812  if (I->size() < 2)
813  continue;
814 
815  // Translate the SCC into RPO.
816  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
817  }
818 
819  if (OuterLoop)
820  return make_range(std::next(Prev), Insert);
821  return make_range(Loops.begin(), Insert);
822 }
823 
824 void
826  OuterLoop.Exits.clear();
827  for (auto &Mass : OuterLoop.BackedgeMass)
828  Mass = BlockMass::getEmpty();
829  auto O = OuterLoop.Nodes.begin() + 1;
830  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
831  if (!Working[I->Index].isPackaged())
832  *O++ = *I;
833  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
834 }
835 
837  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
838 
839  // Since the loop has more than one header block, the mass flowing back into
840  // each header will be different. Adjust the mass in each header loop to
841  // reflect the masses flowing through back edges.
842  //
843  // To do this, we distribute the initial mass using the backedge masses
844  // as weights for the distribution.
845  BlockMass LoopMass = BlockMass::getFull();
846  Distribution Dist;
847 
848  LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
849  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
850  auto &HeaderNode = Loop.Nodes[H];
851  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
852  LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
853  << getBlockName(HeaderNode) << ": " << BackedgeMass
854  << "\n");
855  if (BackedgeMass.getMass() > 0)
856  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
857  else
858  LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
859  }
860 
861  DitheringDistributer D(Dist, LoopMass);
862 
863  LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
864  << " to headers using above weights\n");
865  for (const Weight &W : Dist.Weights) {
866  BlockMass Taken = D.takeMass(W.Amount);
867  assert(W.Type == Weight::Local && "all weights should be local");
868  Working[W.TargetNode.Index].getMass() = Taken;
869  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
870  }
871 }
872 
874  BlockMass LoopMass = BlockMass::getFull();
875  DitheringDistributer D(Dist, LoopMass);
876  for (const Weight &W : Dist.Weights) {
877  BlockMass Taken = D.takeMass(W.Amount);
878  assert(W.Type == Weight::Local && "all weights should be local");
879  Working[W.TargetNode.Index].getMass() = Taken;
880  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
881  }
882 }
llvm::NextPowerOf2
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:683
llvm::BlockFrequencyInfoImplBase::getLoopName
std::string getLoopName(const LoopData &Loop) const
Definition: BlockFrequencyInfoImpl.cpp:642
combineWeightsByHashing
static void combineWeightsByHashing(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:199
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
MathExtras.h
llvm::bfi_detail::BlockMass
Mass of a block.
Definition: BlockFrequencyInfoImpl.h:91
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass
void distributeIrrLoopHeaderMass(Distribution &Dist)
Definition: BlockFrequencyInfoImpl.cpp:873
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::BlockFrequencyInfoImplBase::computeLoopScale
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
Definition: BlockFrequencyInfoImpl.cpp:387
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
SCCIterator.h
unwrapLoop
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
Definition: BlockFrequencyInfoImpl.cpp:521
llvm::bfi_detail::IrreducibleGraph::addNodesInLoop
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:661
llvm::bfi_detail::IrreducibleGraph
Graph of irreducible control flow.
Definition: BlockFrequencyInfoImpl.h:602
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
combineWeights
static void combineWeights(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:218
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:181
llvm::GraphTraits< IrreducibleGraph >::getEntryNode
static NodeRef getEntryNode(const GraphT &G)
Definition: BlockFrequencyInfoImpl.cpp:702
llvm::BlockFrequencyInfoImplBase::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:590
llvm::BlockFrequencyInfoImplBase::FrequencyData
Stats about a block itself.
Definition: BlockFrequencyInfoImpl.h:214
llvm::GraphTraits< IrreducibleGraph >::child_end
static ChildIteratorType child_end(NodeRef N)
Definition: BlockFrequencyInfoImpl.cpp:704
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::ScaledNumber::inverse
ScaledNumber inverse() const
Definition: ScaledNumber.h:677
cleanup
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
Definition: BlockFrequencyInfoImpl.cpp:304
APInt.h
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:74
Shift
bool Shift
Definition: README.txt:468
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::bfi_detail::IrreducibleGraph::addNode
void addNode(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.h:648
llvm::rdf::NodeList
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:512
llvm::Optional< uint64_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::bfi_detail::BlockMass::isFull
bool isFull() const
Definition: BlockFrequencyInfoImpl.h:106
llvm::BlockFrequencyInfoImplBase::Distribution::addLocal
void addLocal(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:393
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::BlockFrequencyInfoImplBase::clear
void clear()
Clear all memory.
Definition: BlockFrequencyInfoImpl.cpp:292
llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:987
llvm::BlockFrequencyInfoImplBase::LoopData::getHeader
BlockNode getHeader() const
Definition: BlockFrequencyInfoImpl.h:256
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::BlockFrequencyInfoImplBase::packageLoop
void packageLoop(LoopData &Loop)
Package up a loop.
Definition: BlockFrequencyInfoImpl.cpp:422
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GraphTraits< IrreducibleGraph >::ChildIteratorType
GraphT::IrrNode::iterator ChildIteratorType
Definition: BlockFrequencyInfoImpl.cpp:700
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GraphTraits.h
llvm::BlockFrequencyInfoImplBase::updateLoopWithIrreducible
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
Definition: BlockFrequencyInfoImpl.cpp:825
llvm::SparseBitVector
Definition: SparseBitVector.h:255
BlockFrequencyInfoImpl.h
llvm::BlockFrequencyInfoImplBase::finalizeMetrics
void finalizeMetrics()
Finalize frequency metrics.
Definition: BlockFrequencyInfoImpl.cpp:552
llvm::bfi_detail::BlockMass::dump
void dump() const
Definition: BlockFrequencyInfoImpl.cpp:72
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::bfi_detail::IrreducibleGraph::addEdge
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:682
llvm::BlockFrequencyInfoImplBase::getProfileCountFromFreq
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:598
llvm::bfi_detail::IrreducibleGraph::Start
BlockNode Start
Definition: BlockFrequencyInfoImpl.h:622
llvm::bfi_detail::BlockMass::toScaled
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
Definition: BlockFrequencyInfoImpl.cpp:65
llvm::BlockFrequencyInfoImplBase::getBlockFreq
BlockFrequency getBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:574
UINT64_MAX
#define UINT64_MAX
Definition: DataTypes.h:77
llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:487
llvm::BlockFrequencyInfoImplBase::LoopData::isHeader
bool isHeader(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.h:249
llvm::bfi_detail::IrreducibleGraph::BFI
BFIBase & BFI
Definition: BlockFrequencyInfoImpl.h:605
llvm::BlockFrequencyInfoImplBase::distributeMass
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
Definition: BlockFrequencyInfoImpl.cpp:447
llvm::bfi_detail::BlockMass::getFull
static BlockMass getFull()
Definition: BlockFrequencyInfoImpl.h:100
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::bfi_detail
Definition: BlockFrequencyInfoImpl.h:70
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
createIrreducibleLoop
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
Definition: BlockFrequencyInfoImpl.cpp:782
combineWeightsBySorting
static void combineWeightsBySorting(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:178
llvm::None
const NoneType None
Definition: None.h:23
llvm::BlockFrequencyInfoImplBase::Distribution
Distribution of unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:384
BranchProbability.h
llvm::SmallString< 256 >
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:228
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::cl::opt< bool >
llvm::BlockFrequencyInfoImplBase::printBlockFreq
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:647
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:179
llvm::BlockFrequencyInfoImplBase::LoopData::isIrreducible
bool isIrreducible() const
Definition: BlockFrequencyInfoImpl.h:257
llvm::BlockFrequencyInfoImplBase::Distribution::WeightList
SmallVector< Weight, 4 > WeightList
Definition: BlockFrequencyInfoImpl.h:385
llvm::BlockFrequency
Definition: BlockFrequency.h:24
llvm::BlockFrequencyInfoImplBase::Distribution::addBackedge
void addBackedge(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:401
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
combineWeight
static void combineWeight(Weight &W, const Weight &OtherW)
Definition: BlockFrequencyInfoImpl.cpp:162
Scaled
@ Scaled
Definition: ARCInstrInfo.cpp:35
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:35
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::bfi_detail::IrreducibleGraph::IrrNode
Definition: BlockFrequencyInfoImpl.h:608
llvm::IterativeBFIPrecision
llvm::cl::opt< double > IterativeBFIPrecision
llvm::BlockFrequencyInfoImplBase::Working
std::vector< WorkingData > Working
Loop data: see initializeLoops().
Definition: BlockFrequencyInfoImpl.h:428
llvm::GraphTraits< IrreducibleGraph >::child_begin
static ChildIteratorType child_begin(NodeRef N)
Definition: BlockFrequencyInfoImpl.cpp:703
debugAssign
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
Definition: BlockFrequencyInfoImpl.cpp:435
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::BlockFrequencyInfoImplBase::addLoopSuccessorsToDist
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:374
llvm::bfi_detail::IrreducibleGraph::addNodesInFunction
void addNodesInFunction()
Definition: BlockFrequencyInfoImpl.cpp:669
llvm::BlockFrequencyInfoImplBase::unwrapLoops
void unwrapLoops()
Unwrap loops.
Definition: BlockFrequencyInfoImpl.cpp:543
llvm::bfi_detail::BlockMass::getEmpty
static BlockMass getEmpty()
Definition: BlockFrequencyInfoImpl.h:98
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::CheckBFIUnknownBlockQueries
llvm::cl::opt< bool > CheckBFIUnknownBlockQueries
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
findIrreducibleHeaders
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Definition: BlockFrequencyInfoImpl.cpp:713
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:681
llvm::BlockFrequencyInfoImplBase::BlockNode::Index
IndexType Index
Definition: BlockFrequencyInfoImpl.h:194
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::bfi_detail::BlockMass::isEmpty
bool isEmpty() const
Definition: BlockFrequencyInfoImpl.h:107
Compiler.h
llvm::BlockFrequencyInfoImplBase
Base class for BlockFrequencyInfoImpl.
Definition: BlockFrequencyInfoImpl.h:179
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::BlockFrequencyInfoImplBase::LoopData::Exits
ExitMap Exits
Successor edges (and weights).
Definition: BlockFrequencyInfoImpl.h:231
ScaledNumber.h
llvm::BlockFrequencyInfoImplBase::Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: BlockFrequencyInfoImpl.h:181
llvm::ScaledNumber< uint64_t >
llvm::bfi_detail::IrreducibleGraph::IrrNode::NumIn
unsigned NumIn
Definition: BlockFrequencyInfoImpl.h:610
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::BlockFrequencyInfoImplBase::adjustLoopHeaderMass
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
Definition: BlockFrequencyInfoImpl.cpp:836
llvm::BlockFrequencyInfoImplBase::getFloatingBlockFreq
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:623
Function.h
llvm::BlockFrequencyInfoImplBase::Distribution::Weights
WeightList Weights
Individual successor weights.
Definition: BlockFrequencyInfoImpl.h:387
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:100
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1475
llvm::BlockFrequencyInfoImplBase::setBlockFreq
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
Definition: BlockFrequencyInfoImpl.cpp:629
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:42
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::BlockFrequencyInfoImplBase::Weight
Unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:365
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::BlockFrequencyInfoImplBase::LoopData::Nodes
NodeList Nodes
Header and the members of the loop.
Definition: BlockFrequencyInfoImpl.h:232
convertFloatingToInteger
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
Definition: BlockFrequencyInfoImpl.cpp:482
llvm::bfi_detail::BlockMass::print
raw_ostream & print(raw_ostream &OS) const
Definition: BlockFrequencyInfoImpl.cpp:82
shiftRightAndRound
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
Definition: BlockFrequencyInfoImpl.cpp:228
llvm::bfi_detail::BlockMass::getMass
uint64_t getMass() const
Definition: BlockFrequencyInfoImpl.h:104
llvm::BlockFrequencyInfoImplBase::LoopData::BackedgeMass
HeaderMassList BackedgeMass
Mass returned to each loop header.
Definition: BlockFrequencyInfoImpl.h:233
llvm::IterativeBFIMaxIterationsPerBlock
llvm::cl::opt< unsigned > IterativeBFIMaxIterationsPerBlock
N
#define N
llvm::bfi_detail::getBlockName
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Definition: BlockFrequencyInfoImpl.h:575
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:656
llvm::BlockFrequencyInfoImplBase::getBlockName
virtual std::string getBlockName(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:637
llvm::BlockFrequencyInfoImplBase::LoopData::getHeaderIndex
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
Definition: BlockFrequencyInfoImpl.h:259
llvm::BlockFrequencyInfoImplBase::isIrrLoopHeader
bool isIrrLoopHeader(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.cpp:616
llvm::GraphTraits
Definition: GraphTraits.h:35
llvm::BlockFrequencyInfoImplBase::LoopData
Data about a loop.
Definition: BlockFrequencyInfoImpl.h:223
llvm::bfi_detail::IrreducibleGraph::indexNodes
void indexNodes()
Definition: BlockFrequencyInfoImpl.cpp:677
llvm::bfi_detail::IrreducibleGraph::Nodes
std::vector< IrrNode > Nodes
Definition: BlockFrequencyInfoImpl.h:624
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::bfi_detail::IrreducibleGraph::Lookup
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
Definition: BlockFrequencyInfoImpl.h:625
llvm::BlockFrequencyInfoImplBase::Distribution::addExit
void addExit(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:397
llvm::BlockFrequencyInfoImplBase::BlockNode
Representative of a block.
Definition: BlockFrequencyInfoImpl.h:191
Debug.h
llvm::bfi_detail::IrreducibleGraph::IrrNode::Edges
std::deque< const IrrNode * > Edges
Definition: BlockFrequencyInfoImpl.h:611
llvm::BlockFrequencyInfoImplBase::addToDist
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:312
llvm::BlockFrequencyInfoImplBase::analyzeIrreducible
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
Definition: BlockFrequencyInfoImpl.cpp:805
getHexDigit
static char getHexDigit(int N)
Definition: BlockFrequencyInfoImpl.cpp:75