LLVM  16.0.0git
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loops should be simplified before this analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/SCCIterator.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/IR/Function.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/Support/Debug.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <list>
33 #include <numeric>
34 #include <utility>
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::bfi_detail;
39 
40 #define DEBUG_TYPE "block-freq"
41 
42 namespace llvm {
44  "check-bfi-unknown-block-queries",
45  cl::init(false), cl::Hidden,
46  cl::desc("Check if block frequency is queried for an unknown block "
47  "for debugging missed BFI updates"));
48 
50  "use-iterative-bfi-inference", cl::Hidden,
51  cl::desc("Apply an iterative post-processing to infer correct BFI counts"));
52 
54  "iterative-bfi-max-iterations-per-block", cl::init(1000), cl::Hidden,
55  cl::desc("Iterative inference: maximum number of update iterations "
56  "per block"));
57 
59  "iterative-bfi-precision", cl::init(1e-12), cl::Hidden,
60  cl::desc("Iterative inference: delta convergence precision; smaller values "
61  "typically lead to better results at the cost of worsen runtime"));
62 }
63 
65  if (isFull())
66  return ScaledNumber<uint64_t>(1, 0);
67  return ScaledNumber<uint64_t>(getMass() + 1, -64);
68 }
69 
70 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
72 #endif
73 
74 static char getHexDigit(int N) {
75  assert(N < 16);
76  if (N < 10)
77  return '0' + N;
78  return 'a' + N - 10;
79 }
80 
82  for (int Digits = 0; Digits < 16; ++Digits)
83  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
84  return OS;
85 }
86 
87 namespace {
88 
90 using Distribution = BlockFrequencyInfoImplBase::Distribution;
95 using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
96 
97 /// Dithering mass distributer.
98 ///
99 /// This class splits up a single mass into portions by weight, dithering to
100 /// spread out error. No mass is lost. The dithering precision depends on the
101 /// precision of the product of \a BlockMass and \a BranchProbability.
102 ///
103 /// The distribution algorithm follows.
104 ///
105 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
106 /// mass to distribute in \a RemMass.
107 ///
108 /// 2. For each portion:
109 ///
110 /// 1. Construct a branch probability, P, as the portion's weight divided
111 /// by the current value of \a RemWeight.
112 /// 2. Calculate the portion's mass as \a RemMass times P.
113 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
114 /// the current portion's weight and mass.
115 struct DitheringDistributer {
116  uint32_t RemWeight;
117  BlockMass RemMass;
118 
119  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
120 
121  BlockMass takeMass(uint32_t Weight);
122 };
123 
124 } // end anonymous namespace
125 
126 DitheringDistributer::DitheringDistributer(Distribution &Dist,
127  const BlockMass &Mass) {
128  Dist.normalize();
129  RemWeight = Dist.Total;
130  RemMass = Mass;
131 }
132 
133 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
134  assert(Weight && "invalid weight");
135  assert(Weight <= RemWeight);
136  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
137 
138  // Decrement totals (dither).
139  RemWeight -= Weight;
140  RemMass -= Mass;
141  return Mass;
142 }
143 
144 void Distribution::add(const BlockNode &Node, uint64_t Amount,
145  Weight::DistType Type) {
146  assert(Amount && "invalid weight of 0");
147  uint64_t NewTotal = Total + Amount;
148 
149  // Check for overflow. It should be impossible to overflow twice.
150  bool IsOverflow = NewTotal < Total;
151  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
152  DidOverflow |= IsOverflow;
153 
154  // Update the total.
155  Total = NewTotal;
156 
157  // Save the weight.
158  Weights.push_back(Weight(Type, Node, Amount));
159 }
160 
161 static void combineWeight(Weight &W, const Weight &OtherW) {
162  assert(OtherW.TargetNode.isValid());
163  if (!W.Amount) {
164  W = OtherW;
165  return;
166  }
167  assert(W.Type == OtherW.Type);
168  assert(W.TargetNode == OtherW.TargetNode);
169  assert(OtherW.Amount && "Expected non-zero weight");
170  if (W.Amount > W.Amount + OtherW.Amount)
171  // Saturate on overflow.
172  W.Amount = UINT64_MAX;
173  else
174  W.Amount += OtherW.Amount;
175 }
176 
177 static void combineWeightsBySorting(WeightList &Weights) {
178  // Sort so edges to the same node are adjacent.
179  llvm::sort(Weights, [](const Weight &L, const Weight &R) {
180  return L.TargetNode < R.TargetNode;
181  });
182 
183  // Combine adjacent edges.
184  WeightList::iterator O = Weights.begin();
185  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
186  ++O, (I = L)) {
187  *O = *I;
188 
189  // Find the adjacent weights to the same node.
190  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
191  combineWeight(*O, *L);
192  }
193 
194  // Erase extra entries.
195  Weights.erase(O, Weights.end());
196 }
197 
198 static void combineWeightsByHashing(WeightList &Weights) {
199  // Collect weights into a DenseMap.
200  using HashTable = DenseMap<BlockNode::IndexType, Weight>;
201 
202  HashTable Combined(NextPowerOf2(2 * Weights.size()));
203  for (const Weight &W : Weights)
204  combineWeight(Combined[W.TargetNode.Index], W);
205 
206  // Check whether anything changed.
207  if (Weights.size() == Combined.size())
208  return;
209 
210  // Fill in the new weights.
211  Weights.clear();
212  Weights.reserve(Combined.size());
213  for (const auto &I : Combined)
214  Weights.push_back(I.second);
215 }
216 
217 static void combineWeights(WeightList &Weights) {
218  // Use a hash table for many successors to keep this linear.
219  if (Weights.size() > 128) {
220  combineWeightsByHashing(Weights);
221  return;
222  }
223 
224  combineWeightsBySorting(Weights);
225 }
226 
228  assert(Shift >= 0);
229  assert(Shift < 64);
230  if (!Shift)
231  return N;
232  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
233 }
234 
235 void Distribution::normalize() {
236  // Early exit for termination nodes.
237  if (Weights.empty())
238  return;
239 
240  // Only bother if there are multiple successors.
241  if (Weights.size() > 1)
242  combineWeights(Weights);
243 
244  // Early exit when combined into a single successor.
245  if (Weights.size() == 1) {
246  Total = 1;
247  Weights.front().Amount = 1;
248  return;
249  }
250 
251  // Determine how much to shift right so that the total fits into 32-bits.
252  //
253  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
254  // for each weight can cause a 32-bit overflow.
255  int Shift = 0;
256  if (DidOverflow)
257  Shift = 33;
258  else if (Total > UINT32_MAX)
259  Shift = 33 - countLeadingZeros(Total);
260 
261  // Early exit if nothing needs to be scaled.
262  if (!Shift) {
263  // If we didn't overflow then combineWeights() shouldn't have changed the
264  // sum of the weights, but let's double-check.
265  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
266  [](uint64_t Sum, const Weight &W) {
267  return Sum + W.Amount;
268  }) &&
269  "Expected total to be correct");
270  return;
271  }
272 
273  // Recompute the total through accumulation (rather than shifting it) so that
274  // it's accurate after shifting and any changes combineWeights() made above.
275  Total = 0;
276 
277  // Sum the weights to each node and shift right if necessary.
278  for (Weight &W : Weights) {
279  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
280  // can round here without concern about overflow.
281  assert(W.TargetNode.isValid());
282  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
283  assert(W.Amount <= UINT32_MAX);
284 
285  // Update the total.
286  Total += W.Amount;
287  }
288  assert(Total <= UINT32_MAX);
289 }
290 
292  // Swap with a default-constructed std::vector, since std::vector<>::clear()
293  // does not actually clear heap storage.
294  std::vector<FrequencyData>().swap(Freqs);
295  IsIrrLoopHeader.clear();
296  std::vector<WorkingData>().swap(Working);
297  Loops.clear();
298 }
299 
300 /// Clear all memory not needed downstream.
301 ///
302 /// Releases all memory not used downstream. In particular, saves Freqs.
304  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
305  SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
306  BFI.clear();
307  BFI.Freqs = std::move(SavedFreqs);
308  BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
309 }
310 
312  const LoopData *OuterLoop,
313  const BlockNode &Pred,
314  const BlockNode &Succ,
315  uint64_t Weight) {
316  if (!Weight)
317  Weight = 1;
318 
319  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
320  return OuterLoop && OuterLoop->isHeader(Node);
321  };
322 
323  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
324 
325 #ifndef NDEBUG
326  auto debugSuccessor = [&](const char *Type) {
327  dbgs() << " =>"
328  << " [" << Type << "] weight = " << Weight;
329  if (!isLoopHeader(Resolved))
330  dbgs() << ", succ = " << getBlockName(Succ);
331  if (Resolved != Succ)
332  dbgs() << ", resolved = " << getBlockName(Resolved);
333  dbgs() << "\n";
334  };
335  (void)debugSuccessor;
336 #endif
337 
338  if (isLoopHeader(Resolved)) {
339  LLVM_DEBUG(debugSuccessor("backedge"));
340  Dist.addBackedge(Resolved, Weight);
341  return true;
342  }
343 
344  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
345  LLVM_DEBUG(debugSuccessor(" exit "));
346  Dist.addExit(Resolved, Weight);
347  return true;
348  }
349 
350  if (Resolved < Pred) {
351  if (!isLoopHeader(Pred)) {
352  // If OuterLoop is an irreducible loop, we can't actually handle this.
353  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
354  "unhandled irreducible control flow");
355 
356  // Irreducible backedge. Abort.
357  LLVM_DEBUG(debugSuccessor("abort!!!"));
358  return false;
359  }
360 
361  // If "Pred" is a loop header, then this isn't really a backedge; rather,
362  // OuterLoop must be irreducible. These false backedges can come only from
363  // secondary loop headers.
364  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
365  "unhandled irreducible control flow");
366  }
367 
368  LLVM_DEBUG(debugSuccessor(" local "));
369  Dist.addLocal(Resolved, Weight);
370  return true;
371 }
372 
374  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
375  // Copy the exit map into Dist.
376  for (const auto &I : Loop.Exits)
377  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
378  I.second.getMass()))
379  // Irreducible backedge.
380  return false;
381 
382  return true;
383 }
384 
385 /// Compute the loop scale for a loop.
387  // Compute loop scale.
388  LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
389 
390  // Infinite loops need special handling. If we give the back edge an infinite
391  // mass, they may saturate all the other scales in the function down to 1,
392  // making all the other region temperatures look exactly the same. Choose an
393  // arbitrary scale to avoid these issues.
394  //
395  // FIXME: An alternate way would be to select a symbolic scale which is later
396  // replaced to be the maximum of all computed scales plus 1. This would
397  // appropriately describe the loop as having a large scale, without skewing
398  // the final frequency computation.
399  const Scaled64 InfiniteLoopScale(1, 12);
400 
401  // LoopScale == 1 / ExitMass
402  // ExitMass == HeadMass - BackedgeMass
403  BlockMass TotalBackedgeMass;
404  for (auto &Mass : Loop.BackedgeMass)
405  TotalBackedgeMass += Mass;
406  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
407 
408  // Block scale stores the inverse of the scale. If this is an infinite loop,
409  // its exit mass will be zero. In this case, use an arbitrary scale for the
410  // loop scale.
411  Loop.Scale =
412  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
413 
414  LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
415  << BlockMass::getFull() << " - " << TotalBackedgeMass
416  << ")\n"
417  << " - scale = " << Loop.Scale << "\n");
418 }
419 
420 /// Package up a loop.
422  LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
423 
424  // Clear the subloop exits to prevent quadratic memory usage.
425  for (const BlockNode &M : Loop.Nodes) {
426  if (auto *Loop = Working[M.Index].getPackagedLoop())
427  Loop->Exits.clear();
428  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
429  }
430  Loop.IsPackaged = true;
431 }
432 
433 #ifndef NDEBUG
435  const DitheringDistributer &D, const BlockNode &T,
436  const BlockMass &M, const char *Desc) {
437  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
438  if (Desc)
439  dbgs() << " [" << Desc << "]";
440  if (T.isValid())
441  dbgs() << " to " << BFI.getBlockName(T);
442  dbgs() << "\n";
443 }
444 #endif
445 
447  LoopData *OuterLoop,
448  Distribution &Dist) {
449  BlockMass Mass = Working[Source.Index].getMass();
450  LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
451 
452  // Distribute mass to successors as laid out in Dist.
453  DitheringDistributer D(Dist, Mass);
454 
455  for (const Weight &W : Dist.Weights) {
456  // Check for a local edge (non-backedge and non-exit).
457  BlockMass Taken = D.takeMass(W.Amount);
458  if (W.Type == Weight::Local) {
459  Working[W.TargetNode.Index].getMass() += Taken;
460  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
461  continue;
462  }
463 
464  // Backedges and exits only make sense if we're processing a loop.
465  assert(OuterLoop && "backedge or exit outside of loop");
466 
467  // Check for a backedge.
468  if (W.Type == Weight::Backedge) {
469  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
470  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
471  continue;
472  }
473 
474  // This must be an exit.
475  assert(W.Type == Weight::Exit);
476  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
477  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
478  }
479 }
480 
482  const Scaled64 &Min, const Scaled64 &Max) {
483  // Scale the Factor to a size that creates integers. Ideally, integers would
484  // be scaled so that Max == UINT64_MAX so that they can be best
485  // differentiated. However, in the presence of large frequency values, small
486  // frequencies are scaled down to 1, making it impossible to differentiate
487  // small, unequal numbers. When the spread between Min and Max frequencies
488  // fits well within MaxBits, we make the scale be at least 8.
489  const unsigned MaxBits = 64;
490  const unsigned SpreadBits = (Max / Min).lg();
491  Scaled64 ScalingFactor;
492  if (SpreadBits <= MaxBits - 3) {
493  // If the values are small enough, make the scaling factor at least 8 to
494  // allow distinguishing small values.
495  ScalingFactor = Min.inverse();
496  ScalingFactor <<= 3;
497  } else {
498  // If the values need more than MaxBits to be represented, saturate small
499  // frequency values down to 1 by using a scaling factor that benefits large
500  // frequency values.
501  ScalingFactor = Scaled64(1, MaxBits) / Max;
502  }
503 
504  // Translate the floats to integers.
505  LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
506  << ", factor = " << ScalingFactor << "\n");
507  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
508  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
509  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
510  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
511  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
512  << ", int = " << BFI.Freqs[Index].Integer << "\n");
513  }
514 }
515 
516 /// Unwrap a loop package.
517 ///
518 /// Visits all the members of a loop, adjusting their BlockData according to
519 /// the loop's pseudo-node.
520 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
521  LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
522  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
523  << "\n");
524  Loop.Scale *= Loop.Mass.toScaled();
525  Loop.IsPackaged = false;
526  LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
527 
528  // Propagate the head scale through the loop. Since members are visited in
529  // RPO, the head scale will be updated by the loop scale first, and then the
530  // final head scale will be used for updated the rest of the members.
531  for (const BlockNode &N : Loop.Nodes) {
532  const auto &Working = BFI.Working[N.Index];
533  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
534  : BFI.Freqs[N.Index].Scaled;
535  Scaled64 New = Loop.Scale * F;
536  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
537  << New << "\n");
538  F = New;
539  }
540 }
541 
543  // Set initial frequencies from loop-local masses.
544  for (size_t Index = 0; Index < Working.size(); ++Index)
545  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
546 
547  for (LoopData &Loop : Loops)
548  unwrapLoop(*this, Loop);
549 }
550 
552  // Unwrap loop packages in reverse post-order, tracking min and max
553  // frequencies.
554  auto Min = Scaled64::getLargest();
555  auto Max = Scaled64::getZero();
556  for (size_t Index = 0; Index < Working.size(); ++Index) {
557  // Update min/max scale.
558  Min = std::min(Min, Freqs[Index].Scaled);
559  Max = std::max(Max, Freqs[Index].Scaled);
560  }
561 
562  // Convert to integers.
563  convertFloatingToInteger(*this, Min, Max);
564 
565  // Clean up data structures.
566  cleanup(*this);
567 
568  // Print out the final stats.
569  LLVM_DEBUG(dump());
570 }
571 
574  if (!Node.isValid()) {
575 #ifndef NDEBUG
579  OS << "*** Detected BFI query for unknown block " << getBlockName(Node);
580  report_fatal_error(OS.str());
581  }
582 #endif
583  return 0;
584  }
585  return Freqs[Node.Index].Integer;
586 }
587 
590  const BlockNode &Node,
591  bool AllowSynthetic) const {
592  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
593  AllowSynthetic);
594 }
595 
598  uint64_t Freq,
599  bool AllowSynthetic) const {
600  auto EntryCount = F.getEntryCount(AllowSynthetic);
601  if (!EntryCount)
602  return None;
603  // Use 128 bit APInt to do the arithmetic to avoid overflow.
604  APInt BlockCount(128, EntryCount->getCount());
605  APInt BlockFreq(128, Freq);
606  APInt EntryFreq(128, getEntryFreq());
607  BlockCount *= BlockFreq;
608  // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
609  // lshr by 1 gives EntryFreq/2.
610  BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
611  return BlockCount.getLimitedValue();
612 }
613 
614 bool
616  if (!Node.isValid())
617  return false;
618  return IsIrrLoopHeader.test(Node.Index);
619 }
620 
621 Scaled64
623  if (!Node.isValid())
624  return Scaled64::getZero();
625  return Freqs[Node.Index].Scaled;
626 }
627 
629  uint64_t Freq) {
630  assert(Node.isValid() && "Expected valid node");
631  assert(Node.Index < Freqs.size() && "Expected legal index");
632  Freqs[Node.Index].Integer = Freq;
633 }
634 
635 std::string
637  return {};
638 }
639 
640 std::string
642  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
643 }
644 
645 raw_ostream &
647  const BlockNode &Node) const {
648  return OS << getFloatingBlockFreq(Node);
649 }
650 
651 raw_ostream &
653  const BlockFrequency &Freq) const {
654  Scaled64 Block(Freq.getFrequency(), 0);
655  Scaled64 Entry(getEntryFreq(), 0);
656 
657  return OS << Block / Entry;
658 }
659 
661  Start = OuterLoop.getHeader();
662  Nodes.reserve(OuterLoop.Nodes.size());
663  for (auto N : OuterLoop.Nodes)
664  addNode(N);
665  indexNodes();
666 }
667 
669  Start = 0;
670  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
671  if (!BFI.Working[Index].isPackaged())
672  addNode(Index);
673  indexNodes();
674 }
675 
677  for (auto &I : Nodes)
678  Lookup[I.Node.Index] = &I;
679 }
680 
682  const BFIBase::LoopData *OuterLoop) {
683  if (OuterLoop && OuterLoop->isHeader(Succ))
684  return;
685  auto L = Lookup.find(Succ.Index);
686  if (L == Lookup.end())
687  return;
688  IrrNode &SuccIrr = *L->second;
689  Irr.Edges.push_back(&SuccIrr);
690  SuccIrr.Edges.push_front(&Irr);
691  ++SuccIrr.NumIn;
692 }
693 
694 namespace llvm {
695 
696 template <> struct GraphTraits<IrreducibleGraph> {
698  using NodeRef = const GraphT::IrrNode *;
699  using ChildIteratorType = GraphT::IrrNode::iterator;
700 
701  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
702  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
703  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
704 };
705 
706 } // end namespace llvm
707 
708 /// Find extra irreducible headers.
709 ///
710 /// Find entry blocks and other blocks with backedges, which exist when \c G
711 /// contains irreducible sub-SCCs.
714  const IrreducibleGraph &G,
715  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
716  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
717  // Map from nodes in the SCC to whether it's an entry block.
719 
720  // InSCC also acts the set of nodes in the graph. Seed it.
721  for (const auto *I : SCC)
722  InSCC[I] = false;
723 
724  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
725  auto &Irr = *I->first;
726  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
727  if (InSCC.count(P))
728  continue;
729 
730  // This is an entry block.
731  I->second = true;
732  Headers.push_back(Irr.Node);
733  LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
734  << "\n");
735  break;
736  }
737  }
738  assert(Headers.size() >= 2 &&
739  "Expected irreducible CFG; -loop-info is likely invalid");
740  if (Headers.size() == InSCC.size()) {
741  // Every block is a header.
742  llvm::sort(Headers);
743  return;
744  }
745 
746  // Look for extra headers from irreducible sub-SCCs.
747  for (const auto &I : InSCC) {
748  // Entry blocks are already headers.
749  if (I.second)
750  continue;
751 
752  auto &Irr = *I.first;
753  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
754  // Skip forward edges.
755  if (P->Node < Irr.Node)
756  continue;
757 
758  // Skip predecessors from entry blocks. These can have inverted
759  // ordering.
760  if (InSCC.lookup(P))
761  continue;
762 
763  // Store the extra header.
764  Headers.push_back(Irr.Node);
765  LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
766  << "\n");
767  break;
768  }
769  if (Headers.back() == Irr.Node)
770  // Added this as a header.
771  continue;
772 
773  // This is not a header.
774  Others.push_back(Irr.Node);
775  LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
776  }
777  llvm::sort(Headers);
778  llvm::sort(Others);
779 }
780 
783  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
784  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
785  // Translate the SCC into RPO.
786  LLVM_DEBUG(dbgs() << " - found-scc\n");
787 
788  LoopData::NodeList Headers;
789  LoopData::NodeList Others;
790  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
791 
792  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
793  Headers.end(), Others.begin(), Others.end());
794 
795  // Update loop hierarchy.
796  for (const auto &N : Loop->Nodes)
797  if (BFI.Working[N.Index].isLoopHeader())
798  BFI.Working[N.Index].Loop->Parent = &*Loop;
799  else
800  BFI.Working[N.Index].Loop = &*Loop;
801 }
802 
805  const IrreducibleGraph &G, LoopData *OuterLoop,
806  std::list<LoopData>::iterator Insert) {
807  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
808  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
809 
810  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
811  if (I->size() < 2)
812  continue;
813 
814  // Translate the SCC into RPO.
815  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
816  }
817 
818  if (OuterLoop)
819  return make_range(std::next(Prev), Insert);
820  return make_range(Loops.begin(), Insert);
821 }
822 
823 void
825  OuterLoop.Exits.clear();
826  for (auto &Mass : OuterLoop.BackedgeMass)
827  Mass = BlockMass::getEmpty();
828  auto O = OuterLoop.Nodes.begin() + 1;
829  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
830  if (!Working[I->Index].isPackaged())
831  *O++ = *I;
832  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
833 }
834 
836  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
837 
838  // Since the loop has more than one header block, the mass flowing back into
839  // each header will be different. Adjust the mass in each header loop to
840  // reflect the masses flowing through back edges.
841  //
842  // To do this, we distribute the initial mass using the backedge masses
843  // as weights for the distribution.
844  BlockMass LoopMass = BlockMass::getFull();
845  Distribution Dist;
846 
847  LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
848  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
849  auto &HeaderNode = Loop.Nodes[H];
850  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
851  LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
852  << getBlockName(HeaderNode) << ": " << BackedgeMass
853  << "\n");
854  if (BackedgeMass.getMass() > 0)
855  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
856  else
857  LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
858  }
859 
860  DitheringDistributer D(Dist, LoopMass);
861 
862  LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
863  << " to headers using above weights\n");
864  for (const Weight &W : Dist.Weights) {
865  BlockMass Taken = D.takeMass(W.Amount);
866  assert(W.Type == Weight::Local && "all weights should be local");
867  Working[W.TargetNode.Index].getMass() = Taken;
868  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
869  }
870 }
871 
873  BlockMass LoopMass = BlockMass::getFull();
874  DitheringDistributer D(Dist, LoopMass);
875  for (const Weight &W : Dist.Weights) {
876  BlockMass Taken = D.takeMass(W.Amount);
877  assert(W.Type == Weight::Local && "all weights should be local");
878  Working[W.TargetNode.Index].getMass() = Taken;
879  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
880  }
881 }
llvm::BlockFrequencyInfoImplBase::getLoopName
std::string getLoopName(const LoopData &Loop) const
Definition: BlockFrequencyInfoImpl.cpp:641
combineWeightsByHashing
static void combineWeightsByHashing(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:198
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
MathExtras.h
llvm::bfi_detail::BlockMass
Mass of a block.
Definition: BlockFrequencyInfoImpl.h:91
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass
void distributeIrrLoopHeaderMass(Distribution &Dist)
Definition: BlockFrequencyInfoImpl.cpp:872
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::BlockFrequencyInfoImplBase::computeLoopScale
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
Definition: BlockFrequencyInfoImpl.cpp:386
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
SCCIterator.h
unwrapLoop
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
Definition: BlockFrequencyInfoImpl.cpp:520
T
llvm::bfi_detail::IrreducibleGraph::addNodesInLoop
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:660
llvm::bfi_detail::IrreducibleGraph
Graph of irreducible control flow.
Definition: BlockFrequencyInfoImpl.h:602
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
combineWeights
static void combineWeights(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:217
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::GraphTraits< IrreducibleGraph >::getEntryNode
static NodeRef getEntryNode(const GraphT &G)
Definition: BlockFrequencyInfoImpl.cpp:701
llvm::BlockFrequencyInfoImplBase::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:589
llvm::BlockFrequencyInfoImplBase::FrequencyData
Stats about a block itself.
Definition: BlockFrequencyInfoImpl.h:214
llvm::GraphTraits< IrreducibleGraph >::child_end
static ChildIteratorType child_end(NodeRef N)
Definition: BlockFrequencyInfoImpl.cpp:703
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::ScaledNumber::inverse
ScaledNumber inverse() const
Definition: ScaledNumber.h:677
cleanup
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
Definition: BlockFrequencyInfoImpl.cpp:303
APInt.h
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:75
Shift
bool Shift
Definition: README.txt:468
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::bfi_detail::IrreducibleGraph::addNode
void addNode(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.h:648
llvm::rdf::NodeList
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:513
llvm::Optional< uint64_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::bfi_detail::BlockMass::isFull
bool isFull() const
Definition: BlockFrequencyInfoImpl.h:106
llvm::BlockFrequencyInfoImplBase::Distribution::addLocal
void addLocal(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:393
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::BlockFrequencyInfoImplBase::clear
void clear()
Clear all memory.
Definition: BlockFrequencyInfoImpl.cpp:291
llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:832
llvm::BlockFrequencyInfoImplBase::LoopData::getHeader
BlockNode getHeader() const
Definition: BlockFrequencyInfoImpl.h:256
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BlockFrequencyInfoImplBase::packageLoop
void packageLoop(LoopData &Loop)
Package up a loop.
Definition: BlockFrequencyInfoImpl.cpp:421
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GraphTraits< IrreducibleGraph >::ChildIteratorType
GraphT::IrrNode::iterator ChildIteratorType
Definition: BlockFrequencyInfoImpl.cpp:699
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BlockFrequencyInfoImplBase::updateLoopWithIrreducible
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
Definition: BlockFrequencyInfoImpl.cpp:824
llvm::SparseBitVector
Definition: SparseBitVector.h:256
BlockFrequencyInfoImpl.h
llvm::BlockFrequencyInfoImplBase::finalizeMetrics
void finalizeMetrics()
Finalize frequency metrics.
Definition: BlockFrequencyInfoImpl.cpp:551
llvm::bfi_detail::BlockMass::dump
void dump() const
Definition: BlockFrequencyInfoImpl.cpp:71
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::bfi_detail::IrreducibleGraph::addEdge
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:681
llvm::BlockFrequencyInfoImplBase::getProfileCountFromFreq
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:597
llvm::bfi_detail::IrreducibleGraph::Start
BlockNode Start
Definition: BlockFrequencyInfoImpl.h:622
llvm::bfi_detail::BlockMass::toScaled
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
Definition: BlockFrequencyInfoImpl.cpp:64
llvm::BlockFrequencyInfoImplBase::getBlockFreq
BlockFrequency getBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:573
UINT64_MAX
#define UINT64_MAX
Definition: DataTypes.h:77
llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:456
llvm::BlockFrequencyInfoImplBase::LoopData::isHeader
bool isHeader(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.h:249
llvm::NextPowerOf2
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:612
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::bfi_detail::IrreducibleGraph::BFI
BFIBase & BFI
Definition: BlockFrequencyInfoImpl.h:605
llvm::BlockFrequencyInfoImplBase::distributeMass
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
Definition: BlockFrequencyInfoImpl.cpp:446
llvm::bfi_detail::BlockMass::getFull
static BlockMass getFull()
Definition: BlockFrequencyInfoImpl.h:100
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::bfi_detail
Definition: BlockFrequencyInfoImpl.h:70
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
createIrreducibleLoop
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
Definition: BlockFrequencyInfoImpl.cpp:781
combineWeightsBySorting
static void combineWeightsBySorting(WeightList &Weights)
Definition: BlockFrequencyInfoImpl.cpp:177
llvm::None
const NoneType None
Definition: None.h:24
llvm::BlockFrequencyInfoImplBase::Distribution
Distribution of unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:384
BranchProbability.h
llvm::SmallString< 256 >
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1538
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::cl::opt< bool >
llvm::BlockFrequencyInfoImplBase::printBlockFreq
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:646
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::BlockFrequencyInfoImplBase::LoopData::isIrreducible
bool isIrreducible() const
Definition: BlockFrequencyInfoImpl.h:257
llvm::BlockFrequencyInfoImplBase::Distribution::WeightList
SmallVector< Weight, 4 > WeightList
Definition: BlockFrequencyInfoImpl.h:385
llvm::BlockFrequency
Definition: BlockFrequency.h:23
llvm::BlockFrequencyInfoImplBase::Distribution::addBackedge
void addBackedge(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:401
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
combineWeight
static void combineWeight(Weight &W, const Weight &OtherW)
Definition: BlockFrequencyInfoImpl.cpp:161
Scaled
@ Scaled
Definition: ARCInstrInfo.cpp:35
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:34
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
llvm::bfi_detail::IrreducibleGraph::IrrNode
Definition: BlockFrequencyInfoImpl.h:608
llvm::IterativeBFIPrecision
llvm::cl::opt< double > IterativeBFIPrecision
llvm::BlockFrequencyInfoImplBase::Working
std::vector< WorkingData > Working
Loop data: see initializeLoops().
Definition: BlockFrequencyInfoImpl.h:428
llvm::GraphTraits< IrreducibleGraph >::child_begin
static ChildIteratorType child_begin(NodeRef N)
Definition: BlockFrequencyInfoImpl.cpp:702
debugAssign
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
Definition: BlockFrequencyInfoImpl.cpp:434
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::BlockFrequencyInfoImplBase::addLoopSuccessorsToDist
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:373
llvm::bfi_detail::IrreducibleGraph::addNodesInFunction
void addNodesInFunction()
Definition: BlockFrequencyInfoImpl.cpp:668
llvm::BlockFrequencyInfoImplBase::unwrapLoops
void unwrapLoops()
Unwrap loops.
Definition: BlockFrequencyInfoImpl.cpp:542
llvm::bfi_detail::BlockMass::getEmpty
static BlockMass getEmpty()
Definition: BlockFrequencyInfoImpl.h:98
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:307
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::CheckBFIUnknownBlockQueries
llvm::cl::opt< bool > CheckBFIUnknownBlockQueries
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
findIrreducibleHeaders
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Definition: BlockFrequencyInfoImpl.cpp:712
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:683
llvm::BlockFrequencyInfoImplBase::BlockNode::Index
IndexType Index
Definition: BlockFrequencyInfoImpl.h:194
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::bfi_detail::BlockMass::isEmpty
bool isEmpty() const
Definition: BlockFrequencyInfoImpl.h:107
Compiler.h
llvm::BlockFrequencyInfoImplBase
Base class for BlockFrequencyInfoImpl.
Definition: BlockFrequencyInfoImpl.h:179
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::BlockFrequencyInfoImplBase::LoopData::Exits
ExitMap Exits
Successor edges (and weights).
Definition: BlockFrequencyInfoImpl.h:231
ScaledNumber.h
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::BlockFrequencyInfoImplBase::Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: BlockFrequencyInfoImpl.h:181
llvm::ScaledNumber< uint64_t >
llvm::bfi_detail::IrreducibleGraph::IrrNode::NumIn
unsigned NumIn
Definition: BlockFrequencyInfoImpl.h:610
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::BlockFrequencyInfoImplBase::adjustLoopHeaderMass
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
Definition: BlockFrequencyInfoImpl.cpp:835
llvm::BlockFrequencyInfoImplBase::getFloatingBlockFreq
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:622
Function.h
llvm::BlockFrequencyInfoImplBase::Distribution::Weights
WeightList Weights
Individual successor weights.
Definition: BlockFrequencyInfoImpl.h:387
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:99
llvm::BlockFrequencyInfoImplBase::setBlockFreq
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
Definition: BlockFrequencyInfoImpl.cpp:628
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:37
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:597
llvm::BlockFrequencyInfoImplBase::Weight
Unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:365
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:221
llvm::BlockFrequencyInfoImplBase::LoopData::Nodes
NodeList Nodes
Header and the members of the loop.
Definition: BlockFrequencyInfoImpl.h:232
convertFloatingToInteger
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
Definition: BlockFrequencyInfoImpl.cpp:481
llvm::bfi_detail::BlockMass::print
raw_ostream & print(raw_ostream &OS) const
Definition: BlockFrequencyInfoImpl.cpp:81
shiftRightAndRound
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
Definition: BlockFrequencyInfoImpl.cpp:227
llvm::bfi_detail::BlockMass::getMass
uint64_t getMass() const
Definition: BlockFrequencyInfoImpl.h:104
llvm::BlockFrequencyInfoImplBase::LoopData::BackedgeMass
HeaderMassList BackedgeMass
Mass returned to each loop header.
Definition: BlockFrequencyInfoImpl.h:233
llvm::IterativeBFIMaxIterationsPerBlock
llvm::cl::opt< unsigned > IterativeBFIMaxIterationsPerBlock
N
#define N
llvm::bfi_detail::getBlockName
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Definition: BlockFrequencyInfoImpl.h:575
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::BlockFrequencyInfoImplBase::getBlockName
virtual std::string getBlockName(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:636
llvm::BlockFrequencyInfoImplBase::LoopData::getHeaderIndex
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
Definition: BlockFrequencyInfoImpl.h:259
llvm::BlockFrequencyInfoImplBase::isIrrLoopHeader
bool isIrrLoopHeader(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.cpp:615
llvm::GraphTraits
Definition: GraphTraits.h:37
llvm::BlockFrequencyInfoImplBase::LoopData
Data about a loop.
Definition: BlockFrequencyInfoImpl.h:223
llvm::bfi_detail::IrreducibleGraph::indexNodes
void indexNodes()
Definition: BlockFrequencyInfoImpl.cpp:676
llvm::bfi_detail::IrreducibleGraph::Nodes
std::vector< IrrNode > Nodes
Definition: BlockFrequencyInfoImpl.h:624
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::bfi_detail::IrreducibleGraph::Lookup
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
Definition: BlockFrequencyInfoImpl.h:625
llvm::BlockFrequencyInfoImplBase::Distribution::addExit
void addExit(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:397
llvm::BlockFrequencyInfoImplBase::BlockNode
Representative of a block.
Definition: BlockFrequencyInfoImpl.h:191
Debug.h
llvm::bfi_detail::IrreducibleGraph::IrrNode::Edges
std::deque< const IrrNode * > Edges
Definition: BlockFrequencyInfoImpl.h:611
llvm::BlockFrequencyInfoImplBase::addToDist
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:311
llvm::BlockFrequencyInfoImplBase::analyzeIrreducible
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
Definition: BlockFrequencyInfoImpl.cpp:804
getHexDigit
static char getHexDigit(int N)
Definition: BlockFrequencyInfoImpl.cpp:74