LLVM  6.0.0svn
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loops should be simplified before this analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/SCCIterator.h"
20 #include "llvm/IR/Function.h"
23 #include "llvm/Support/Compiler.h"
24 #include "llvm/Support/Debug.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <list>
34 #include <numeric>
35 #include <utility>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::bfi_detail;
40 
41 #define DEBUG_TYPE "block-freq"
42 
44  if (isFull())
45  return ScaledNumber<uint64_t>(1, 0);
46  return ScaledNumber<uint64_t>(getMass() + 1, -64);
47 }
48 
49 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
51 #endif
52 
53 static char getHexDigit(int N) {
54  assert(N < 16);
55  if (N < 10)
56  return '0' + N;
57  return 'a' + N - 10;
58 }
59 
61  for (int Digits = 0; Digits < 16; ++Digits)
62  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
63  return OS;
64 }
65 
66 namespace {
67 
69 using Distribution = BlockFrequencyInfoImplBase::Distribution;
74 using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
75 
76 /// \brief Dithering mass distributer.
77 ///
78 /// This class splits up a single mass into portions by weight, dithering to
79 /// spread out error. No mass is lost. The dithering precision depends on the
80 /// precision of the product of \a BlockMass and \a BranchProbability.
81 ///
82 /// The distribution algorithm follows.
83 ///
84 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
85 /// mass to distribute in \a RemMass.
86 ///
87 /// 2. For each portion:
88 ///
89 /// 1. Construct a branch probability, P, as the portion's weight divided
90 /// by the current value of \a RemWeight.
91 /// 2. Calculate the portion's mass as \a RemMass times P.
92 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
93 /// the current portion's weight and mass.
94 struct DitheringDistributer {
95  uint32_t RemWeight;
96  BlockMass RemMass;
97 
98  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
99 
100  BlockMass takeMass(uint32_t Weight);
101 };
102 
103 } // end anonymous namespace
104 
105 DitheringDistributer::DitheringDistributer(Distribution &Dist,
106  const BlockMass &Mass) {
107  Dist.normalize();
108  RemWeight = Dist.Total;
109  RemMass = Mass;
110 }
111 
112 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
113  assert(Weight && "invalid weight");
114  assert(Weight <= RemWeight);
115  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
116 
117  // Decrement totals (dither).
118  RemWeight -= Weight;
119  RemMass -= Mass;
120  return Mass;
121 }
122 
123 void Distribution::add(const BlockNode &Node, uint64_t Amount,
124  Weight::DistType Type) {
125  assert(Amount && "invalid weight of 0");
126  uint64_t NewTotal = Total + Amount;
127 
128  // Check for overflow. It should be impossible to overflow twice.
129  bool IsOverflow = NewTotal < Total;
130  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
131  DidOverflow |= IsOverflow;
132 
133  // Update the total.
134  Total = NewTotal;
135 
136  // Save the weight.
137  Weights.push_back(Weight(Type, Node, Amount));
138 }
139 
140 static void combineWeight(Weight &W, const Weight &OtherW) {
141  assert(OtherW.TargetNode.isValid());
142  if (!W.Amount) {
143  W = OtherW;
144  return;
145  }
146  assert(W.Type == OtherW.Type);
147  assert(W.TargetNode == OtherW.TargetNode);
148  assert(OtherW.Amount && "Expected non-zero weight");
149  if (W.Amount > W.Amount + OtherW.Amount)
150  // Saturate on overflow.
151  W.Amount = UINT64_MAX;
152  else
153  W.Amount += OtherW.Amount;
154 }
155 
156 static void combineWeightsBySorting(WeightList &Weights) {
157  // Sort so edges to the same node are adjacent.
158  std::sort(Weights.begin(), Weights.end(),
159  [](const Weight &L,
160  const Weight &R) { return L.TargetNode < R.TargetNode; });
161 
162  // Combine adjacent edges.
163  WeightList::iterator O = Weights.begin();
164  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
165  ++O, (I = L)) {
166  *O = *I;
167 
168  // Find the adjacent weights to the same node.
169  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
170  combineWeight(*O, *L);
171  }
172 
173  // Erase extra entries.
174  Weights.erase(O, Weights.end());
175 }
176 
177 static void combineWeightsByHashing(WeightList &Weights) {
178  // Collect weights into a DenseMap.
179  using HashTable = DenseMap<BlockNode::IndexType, Weight>;
180 
181  HashTable Combined(NextPowerOf2(2 * Weights.size()));
182  for (const Weight &W : Weights)
183  combineWeight(Combined[W.TargetNode.Index], W);
184 
185  // Check whether anything changed.
186  if (Weights.size() == Combined.size())
187  return;
188 
189  // Fill in the new weights.
190  Weights.clear();
191  Weights.reserve(Combined.size());
192  for (const auto &I : Combined)
193  Weights.push_back(I.second);
194 }
195 
196 static void combineWeights(WeightList &Weights) {
197  // Use a hash table for many successors to keep this linear.
198  if (Weights.size() > 128) {
199  combineWeightsByHashing(Weights);
200  return;
201  }
202 
203  combineWeightsBySorting(Weights);
204 }
205 
206 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
207  assert(Shift >= 0);
208  assert(Shift < 64);
209  if (!Shift)
210  return N;
211  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
212 }
213 
214 void Distribution::normalize() {
215  // Early exit for termination nodes.
216  if (Weights.empty())
217  return;
218 
219  // Only bother if there are multiple successors.
220  if (Weights.size() > 1)
221  combineWeights(Weights);
222 
223  // Early exit when combined into a single successor.
224  if (Weights.size() == 1) {
225  Total = 1;
226  Weights.front().Amount = 1;
227  return;
228  }
229 
230  // Determine how much to shift right so that the total fits into 32-bits.
231  //
232  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
233  // for each weight can cause a 32-bit overflow.
234  int Shift = 0;
235  if (DidOverflow)
236  Shift = 33;
237  else if (Total > UINT32_MAX)
238  Shift = 33 - countLeadingZeros(Total);
239 
240  // Early exit if nothing needs to be scaled.
241  if (!Shift) {
242  // If we didn't overflow then combineWeights() shouldn't have changed the
243  // sum of the weights, but let's double-check.
244  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
245  [](uint64_t Sum, const Weight &W) {
246  return Sum + W.Amount;
247  }) &&
248  "Expected total to be correct");
249  return;
250  }
251 
252  // Recompute the total through accumulation (rather than shifting it) so that
253  // it's accurate after shifting and any changes combineWeights() made above.
254  Total = 0;
255 
256  // Sum the weights to each node and shift right if necessary.
257  for (Weight &W : Weights) {
258  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
259  // can round here without concern about overflow.
260  assert(W.TargetNode.isValid());
261  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
262  assert(W.Amount <= UINT32_MAX);
263 
264  // Update the total.
265  Total += W.Amount;
266  }
267  assert(Total <= UINT32_MAX);
268 }
269 
271  // Swap with a default-constructed std::vector, since std::vector<>::clear()
272  // does not actually clear heap storage.
273  std::vector<FrequencyData>().swap(Freqs);
274  IsIrrLoopHeader.clear();
275  std::vector<WorkingData>().swap(Working);
276  Loops.clear();
277 }
278 
279 /// \brief Clear all memory not needed downstream.
280 ///
281 /// Releases all memory not used downstream. In particular, saves Freqs.
283  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
284  SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
285  BFI.clear();
286  BFI.Freqs = std::move(SavedFreqs);
287  BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
288 }
289 
291  const LoopData *OuterLoop,
292  const BlockNode &Pred,
293  const BlockNode &Succ,
294  uint64_t Weight) {
295  if (!Weight)
296  Weight = 1;
297 
298  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
299  return OuterLoop && OuterLoop->isHeader(Node);
300  };
301 
302  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
303 
304 #ifndef NDEBUG
305  auto debugSuccessor = [&](const char *Type) {
306  dbgs() << " =>"
307  << " [" << Type << "] weight = " << Weight;
308  if (!isLoopHeader(Resolved))
309  dbgs() << ", succ = " << getBlockName(Succ);
310  if (Resolved != Succ)
311  dbgs() << ", resolved = " << getBlockName(Resolved);
312  dbgs() << "\n";
313  };
314  (void)debugSuccessor;
315 #endif
316 
317  if (isLoopHeader(Resolved)) {
318  DEBUG(debugSuccessor("backedge"));
319  Dist.addBackedge(Resolved, Weight);
320  return true;
321  }
322 
323  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
324  DEBUG(debugSuccessor(" exit "));
325  Dist.addExit(Resolved, Weight);
326  return true;
327  }
328 
329  if (Resolved < Pred) {
330  if (!isLoopHeader(Pred)) {
331  // If OuterLoop is an irreducible loop, we can't actually handle this.
332  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
333  "unhandled irreducible control flow");
334 
335  // Irreducible backedge. Abort.
336  DEBUG(debugSuccessor("abort!!!"));
337  return false;
338  }
339 
340  // If "Pred" is a loop header, then this isn't really a backedge; rather,
341  // OuterLoop must be irreducible. These false backedges can come only from
342  // secondary loop headers.
343  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
344  "unhandled irreducible control flow");
345  }
346 
347  DEBUG(debugSuccessor(" local "));
348  Dist.addLocal(Resolved, Weight);
349  return true;
350 }
351 
353  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
354  // Copy the exit map into Dist.
355  for (const auto &I : Loop.Exits)
356  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
357  I.second.getMass()))
358  // Irreducible backedge.
359  return false;
360 
361  return true;
362 }
363 
364 /// \brief Compute the loop scale for a loop.
366  // Compute loop scale.
367  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
368 
369  // Infinite loops need special handling. If we give the back edge an infinite
370  // mass, they may saturate all the other scales in the function down to 1,
371  // making all the other region temperatures look exactly the same. Choose an
372  // arbitrary scale to avoid these issues.
373  //
374  // FIXME: An alternate way would be to select a symbolic scale which is later
375  // replaced to be the maximum of all computed scales plus 1. This would
376  // appropriately describe the loop as having a large scale, without skewing
377  // the final frequency computation.
378  const Scaled64 InfiniteLoopScale(1, 12);
379 
380  // LoopScale == 1 / ExitMass
381  // ExitMass == HeadMass - BackedgeMass
382  BlockMass TotalBackedgeMass;
383  for (auto &Mass : Loop.BackedgeMass)
384  TotalBackedgeMass += Mass;
385  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
386 
387  // Block scale stores the inverse of the scale. If this is an infinite loop,
388  // its exit mass will be zero. In this case, use an arbitrary scale for the
389  // loop scale.
390  Loop.Scale =
391  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
392 
393  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
394  << " - " << TotalBackedgeMass << ")\n"
395  << " - scale = " << Loop.Scale << "\n");
396 }
397 
398 /// \brief Package up a loop.
400  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
401 
402  // Clear the subloop exits to prevent quadratic memory usage.
403  for (const BlockNode &M : Loop.Nodes) {
404  if (auto *Loop = Working[M.Index].getPackagedLoop())
405  Loop->Exits.clear();
406  DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
407  }
408  Loop.IsPackaged = true;
409 }
410 
411 #ifndef NDEBUG
413  const DitheringDistributer &D, const BlockNode &T,
414  const BlockMass &M, const char *Desc) {
415  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
416  if (Desc)
417  dbgs() << " [" << Desc << "]";
418  if (T.isValid())
419  dbgs() << " to " << BFI.getBlockName(T);
420  dbgs() << "\n";
421 }
422 #endif
423 
425  LoopData *OuterLoop,
426  Distribution &Dist) {
427  BlockMass Mass = Working[Source.Index].getMass();
428  DEBUG(dbgs() << " => mass: " << Mass << "\n");
429 
430  // Distribute mass to successors as laid out in Dist.
431  DitheringDistributer D(Dist, Mass);
432 
433  for (const Weight &W : Dist.Weights) {
434  // Check for a local edge (non-backedge and non-exit).
435  BlockMass Taken = D.takeMass(W.Amount);
436  if (W.Type == Weight::Local) {
437  Working[W.TargetNode.Index].getMass() += Taken;
438  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
439  continue;
440  }
441 
442  // Backedges and exits only make sense if we're processing a loop.
443  assert(OuterLoop && "backedge or exit outside of loop");
444 
445  // Check for a backedge.
446  if (W.Type == Weight::Backedge) {
447  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
448  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
449  continue;
450  }
451 
452  // This must be an exit.
453  assert(W.Type == Weight::Exit);
454  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
455  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
456  }
457 }
458 
460  const Scaled64 &Min, const Scaled64 &Max) {
461  // Scale the Factor to a size that creates integers. Ideally, integers would
462  // be scaled so that Max == UINT64_MAX so that they can be best
463  // differentiated. However, in the presence of large frequency values, small
464  // frequencies are scaled down to 1, making it impossible to differentiate
465  // small, unequal numbers. When the spread between Min and Max frequencies
466  // fits well within MaxBits, we make the scale be at least 8.
467  const unsigned MaxBits = 64;
468  const unsigned SpreadBits = (Max / Min).lg();
469  Scaled64 ScalingFactor;
470  if (SpreadBits <= MaxBits - 3) {
471  // If the values are small enough, make the scaling factor at least 8 to
472  // allow distinguishing small values.
473  ScalingFactor = Min.inverse();
474  ScalingFactor <<= 3;
475  } else {
476  // If the values need more than MaxBits to be represented, saturate small
477  // frequency values down to 1 by using a scaling factor that benefits large
478  // frequency values.
479  ScalingFactor = Scaled64(1, MaxBits) / Max;
480  }
481 
482  // Translate the floats to integers.
483  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
484  << ", factor = " << ScalingFactor << "\n");
485  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
486  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
487  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
488  DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
489  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
490  << ", int = " << BFI.Freqs[Index].Integer << "\n");
491  }
492 }
493 
494 /// \brief Unwrap a loop package.
495 ///
496 /// Visits all the members of a loop, adjusting their BlockData according to
497 /// the loop's pseudo-node.
498 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
499  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
500  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
501  << "\n");
502  Loop.Scale *= Loop.Mass.toScaled();
503  Loop.IsPackaged = false;
504  DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
505 
506  // Propagate the head scale through the loop. Since members are visited in
507  // RPO, the head scale will be updated by the loop scale first, and then the
508  // final head scale will be used for updated the rest of the members.
509  for (const BlockNode &N : Loop.Nodes) {
510  const auto &Working = BFI.Working[N.Index];
511  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
512  : BFI.Freqs[N.Index].Scaled;
513  Scaled64 New = Loop.Scale * F;
514  DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
515  << "\n");
516  F = New;
517  }
518 }
519 
521  // Set initial frequencies from loop-local masses.
522  for (size_t Index = 0; Index < Working.size(); ++Index)
523  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
524 
525  for (LoopData &Loop : Loops)
526  unwrapLoop(*this, Loop);
527 }
528 
530  // Unwrap loop packages in reverse post-order, tracking min and max
531  // frequencies.
532  auto Min = Scaled64::getLargest();
533  auto Max = Scaled64::getZero();
534  for (size_t Index = 0; Index < Working.size(); ++Index) {
535  // Update min/max scale.
536  Min = std::min(Min, Freqs[Index].Scaled);
537  Max = std::max(Max, Freqs[Index].Scaled);
538  }
539 
540  // Convert to integers.
541  convertFloatingToInteger(*this, Min, Max);
542 
543  // Clean up data structures.
544  cleanup(*this);
545 
546  // Print out the final stats.
547  DEBUG(dump());
548 }
549 
552  if (!Node.isValid())
553  return 0;
554  return Freqs[Node.Index].Integer;
555 }
556 
559  const BlockNode &Node) const {
560  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
561 }
562 
565  uint64_t Freq) const {
566  auto EntryCount = F.getEntryCount();
567  if (!EntryCount)
568  return None;
569  // Use 128 bit APInt to do the arithmetic to avoid overflow.
570  APInt BlockCount(128, EntryCount.getValue());
571  APInt BlockFreq(128, Freq);
572  APInt EntryFreq(128, getEntryFreq());
573  BlockCount *= BlockFreq;
574  BlockCount = BlockCount.udiv(EntryFreq);
575  return BlockCount.getLimitedValue();
576 }
577 
578 bool
580  if (!Node.isValid())
581  return false;
582  return IsIrrLoopHeader.test(Node.Index);
583 }
584 
585 Scaled64
587  if (!Node.isValid())
588  return Scaled64::getZero();
589  return Freqs[Node.Index].Scaled;
590 }
591 
593  uint64_t Freq) {
594  assert(Node.isValid() && "Expected valid node");
595  assert(Node.Index < Freqs.size() && "Expected legal index");
596  Freqs[Node.Index].Integer = Freq;
597 }
598 
599 std::string
601  return {};
602 }
603 
604 std::string
606  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
607 }
608 
609 raw_ostream &
611  const BlockNode &Node) const {
612  return OS << getFloatingBlockFreq(Node);
613 }
614 
615 raw_ostream &
617  const BlockFrequency &Freq) const {
618  Scaled64 Block(Freq.getFrequency(), 0);
619  Scaled64 Entry(getEntryFreq(), 0);
620 
621  return OS << Block / Entry;
622 }
623 
625  Start = OuterLoop.getHeader();
626  Nodes.reserve(OuterLoop.Nodes.size());
627  for (auto N : OuterLoop.Nodes)
628  addNode(N);
629  indexNodes();
630 }
631 
633  Start = 0;
634  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
635  if (!BFI.Working[Index].isPackaged())
636  addNode(Index);
637  indexNodes();
638 }
639 
641  for (auto &I : Nodes)
642  Lookup[I.Node.Index] = &I;
643 }
644 
646  const BFIBase::LoopData *OuterLoop) {
647  if (OuterLoop && OuterLoop->isHeader(Succ))
648  return;
649  auto L = Lookup.find(Succ.Index);
650  if (L == Lookup.end())
651  return;
652  IrrNode &SuccIrr = *L->second;
653  Irr.Edges.push_back(&SuccIrr);
654  SuccIrr.Edges.push_front(&Irr);
655  ++SuccIrr.NumIn;
656 }
657 
658 namespace llvm {
659 
660 template <> struct GraphTraits<IrreducibleGraph> {
662  using NodeRef = const GraphT::IrrNode *;
663  using ChildIteratorType = GraphT::IrrNode::iterator;
664 
665  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
667  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
668 };
669 
670 } // end namespace llvm
671 
672 /// \brief Find extra irreducible headers.
673 ///
674 /// Find entry blocks and other blocks with backedges, which exist when \c G
675 /// contains irreducible sub-SCCs.
678  const IrreducibleGraph &G,
679  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
680  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
681  // Map from nodes in the SCC to whether it's an entry block.
683 
684  // InSCC also acts the set of nodes in the graph. Seed it.
685  for (const auto *I : SCC)
686  InSCC[I] = false;
687 
688  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
689  auto &Irr = *I->first;
690  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
691  if (InSCC.count(P))
692  continue;
693 
694  // This is an entry block.
695  I->second = true;
696  Headers.push_back(Irr.Node);
697  DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
698  break;
699  }
700  }
701  assert(Headers.size() >= 2 &&
702  "Expected irreducible CFG; -loop-info is likely invalid");
703  if (Headers.size() == InSCC.size()) {
704  // Every block is a header.
705  std::sort(Headers.begin(), Headers.end());
706  return;
707  }
708 
709  // Look for extra headers from irreducible sub-SCCs.
710  for (const auto &I : InSCC) {
711  // Entry blocks are already headers.
712  if (I.second)
713  continue;
714 
715  auto &Irr = *I.first;
716  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
717  // Skip forward edges.
718  if (P->Node < Irr.Node)
719  continue;
720 
721  // Skip predecessors from entry blocks. These can have inverted
722  // ordering.
723  if (InSCC.lookup(P))
724  continue;
725 
726  // Store the extra header.
727  Headers.push_back(Irr.Node);
728  DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
729  break;
730  }
731  if (Headers.back() == Irr.Node)
732  // Added this as a header.
733  continue;
734 
735  // This is not a header.
736  Others.push_back(Irr.Node);
737  DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
738  }
739  std::sort(Headers.begin(), Headers.end());
740  std::sort(Others.begin(), Others.end());
741 }
742 
745  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
746  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
747  // Translate the SCC into RPO.
748  DEBUG(dbgs() << " - found-scc\n");
749 
750  LoopData::NodeList Headers;
751  LoopData::NodeList Others;
752  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
753 
754  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
755  Headers.end(), Others.begin(), Others.end());
756 
757  // Update loop hierarchy.
758  for (const auto &N : Loop->Nodes)
759  if (BFI.Working[N.Index].isLoopHeader())
760  BFI.Working[N.Index].Loop->Parent = &*Loop;
761  else
762  BFI.Working[N.Index].Loop = &*Loop;
763 }
764 
767  const IrreducibleGraph &G, LoopData *OuterLoop,
768  std::list<LoopData>::iterator Insert) {
769  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
770  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
771 
772  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
773  if (I->size() < 2)
774  continue;
775 
776  // Translate the SCC into RPO.
777  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
778  }
779 
780  if (OuterLoop)
781  return make_range(std::next(Prev), Insert);
782  return make_range(Loops.begin(), Insert);
783 }
784 
785 void
787  OuterLoop.Exits.clear();
788  for (auto &Mass : OuterLoop.BackedgeMass)
789  Mass = BlockMass::getEmpty();
790  auto O = OuterLoop.Nodes.begin() + 1;
791  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
792  if (!Working[I->Index].isPackaged())
793  *O++ = *I;
794  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
795 }
796 
798  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
799 
800  // Since the loop has more than one header block, the mass flowing back into
801  // each header will be different. Adjust the mass in each header loop to
802  // reflect the masses flowing through back edges.
803  //
804  // To do this, we distribute the initial mass using the backedge masses
805  // as weights for the distribution.
806  BlockMass LoopMass = BlockMass::getFull();
807  Distribution Dist;
808 
809  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
810  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
811  auto &HeaderNode = Loop.Nodes[H];
812  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
813  DEBUG(dbgs() << " - Add back edge mass for node "
814  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
815  if (BackedgeMass.getMass() > 0)
816  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
817  else
818  DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
819  }
820 
821  DitheringDistributer D(Dist, LoopMass);
822 
823  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
824  << " to headers using above weights\n");
825  for (const Weight &W : Dist.Weights) {
826  BlockMass Taken = D.takeMass(W.Amount);
827  assert(W.Type == Weight::Local && "all weights should be local");
828  Working[W.TargetNode.Index].getMass() = Taken;
829  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
830  }
831 }
832 
834  BlockMass LoopMass = BlockMass::getFull();
835  DitheringDistributer D(Dist, LoopMass);
836  for (const Weight &W : Dist.Weights) {
837  BlockMass Taken = D.takeMass(W.Amount);
838  assert(W.Type == Weight::Local && "all weights should be local");
839  Working[W.TargetNode.Index].getMass() = Taken;
840  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
841  }
842 }
const NoneType None
Definition: None.h:24
void push_back(const T &Elt)
Definition: SmallVector.h:212
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool IsPackaged
Whether this has been packaged.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
static char getHexDigit(int N)
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
raw_ostream & print(raw_ostream &OS) const
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
F(f)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
Hexagon Hardware Loops
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
virtual std::string getBlockName(const BlockNode &Node) const
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
This file implements a class to represent arbitrary precision integral constant values and operations...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode *> &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Graph of irreducible control flow.
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
std::string getLoopName(const LoopData &Loop) const
bool isHeader(const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
bool isIrrLoopHeader(const BlockNode &Node)
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode *> &SCC)
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
ScaledNumber inverse() const
Definition: ScaledNumber.h:678
void distributeIrrLoopHeaderMass(Distribution &Dist)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
#define H(x, y, z)
Definition: MD5.cpp:57
Distribution of unscaled probability weight.
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1330
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:632
iterator erase(const_iterator CI)
Definition: SmallVector.h:449
void addExit(const BlockNode &Node, uint64_t Amount)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
Definition: APInt.h:69
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:513
static void combineWeightsByHashing(WeightList &Weights)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
void packageLoop(LoopData &Loop)
Package up a loop.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
BlockFrequency getBlockFreq(const BlockNode &Node) const
Base class for BlockFrequencyInfoImpl.
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.