LLVM  9.0.0svn
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Loops should be simplified before this analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/SCCIterator.h"
19 #include "llvm/Config/llvm-config.h"
20 #include "llvm/IR/Function.h"
23 #include "llvm/Support/Compiler.h"
24 #include "llvm/Support/Debug.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <list>
34 #include <numeric>
35 #include <utility>
36 #include <vector>
37 
38 using namespace llvm;
39 using namespace llvm::bfi_detail;
40 
41 #define DEBUG_TYPE "block-freq"
42 
44  if (isFull())
45  return ScaledNumber<uint64_t>(1, 0);
46  return ScaledNumber<uint64_t>(getMass() + 1, -64);
47 }
48 
49 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
51 #endif
52 
53 static char getHexDigit(int N) {
54  assert(N < 16);
55  if (N < 10)
56  return '0' + N;
57  return 'a' + N - 10;
58 }
59 
61  for (int Digits = 0; Digits < 16; ++Digits)
62  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
63  return OS;
64 }
65 
66 namespace {
67 
69 using Distribution = BlockFrequencyInfoImplBase::Distribution;
74 using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
75 
76 /// Dithering mass distributer.
77 ///
78 /// This class splits up a single mass into portions by weight, dithering to
79 /// spread out error. No mass is lost. The dithering precision depends on the
80 /// precision of the product of \a BlockMass and \a BranchProbability.
81 ///
82 /// The distribution algorithm follows.
83 ///
84 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
85 /// mass to distribute in \a RemMass.
86 ///
87 /// 2. For each portion:
88 ///
89 /// 1. Construct a branch probability, P, as the portion's weight divided
90 /// by the current value of \a RemWeight.
91 /// 2. Calculate the portion's mass as \a RemMass times P.
92 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
93 /// the current portion's weight and mass.
94 struct DitheringDistributer {
95  uint32_t RemWeight;
96  BlockMass RemMass;
97 
98  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
99 
100  BlockMass takeMass(uint32_t Weight);
101 };
102 
103 } // end anonymous namespace
104 
105 DitheringDistributer::DitheringDistributer(Distribution &Dist,
106  const BlockMass &Mass) {
107  Dist.normalize();
108  RemWeight = Dist.Total;
109  RemMass = Mass;
110 }
111 
112 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
113  assert(Weight && "invalid weight");
114  assert(Weight <= RemWeight);
115  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
116 
117  // Decrement totals (dither).
118  RemWeight -= Weight;
119  RemMass -= Mass;
120  return Mass;
121 }
122 
123 void Distribution::add(const BlockNode &Node, uint64_t Amount,
124  Weight::DistType Type) {
125  assert(Amount && "invalid weight of 0");
126  uint64_t NewTotal = Total + Amount;
127 
128  // Check for overflow. It should be impossible to overflow twice.
129  bool IsOverflow = NewTotal < Total;
130  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
131  DidOverflow |= IsOverflow;
132 
133  // Update the total.
134  Total = NewTotal;
135 
136  // Save the weight.
137  Weights.push_back(Weight(Type, Node, Amount));
138 }
139 
140 static void combineWeight(Weight &W, const Weight &OtherW) {
141  assert(OtherW.TargetNode.isValid());
142  if (!W.Amount) {
143  W = OtherW;
144  return;
145  }
146  assert(W.Type == OtherW.Type);
147  assert(W.TargetNode == OtherW.TargetNode);
148  assert(OtherW.Amount && "Expected non-zero weight");
149  if (W.Amount > W.Amount + OtherW.Amount)
150  // Saturate on overflow.
151  W.Amount = UINT64_MAX;
152  else
153  W.Amount += OtherW.Amount;
154 }
155 
156 static void combineWeightsBySorting(WeightList &Weights) {
157  // Sort so edges to the same node are adjacent.
158  llvm::sort(Weights, [](const Weight &L, const Weight &R) {
159  return L.TargetNode < R.TargetNode;
160  });
161 
162  // Combine adjacent edges.
163  WeightList::iterator O = Weights.begin();
164  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
165  ++O, (I = L)) {
166  *O = *I;
167 
168  // Find the adjacent weights to the same node.
169  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
170  combineWeight(*O, *L);
171  }
172 
173  // Erase extra entries.
174  Weights.erase(O, Weights.end());
175 }
176 
177 static void combineWeightsByHashing(WeightList &Weights) {
178  // Collect weights into a DenseMap.
179  using HashTable = DenseMap<BlockNode::IndexType, Weight>;
180 
181  HashTable Combined(NextPowerOf2(2 * Weights.size()));
182  for (const Weight &W : Weights)
183  combineWeight(Combined[W.TargetNode.Index], W);
184 
185  // Check whether anything changed.
186  if (Weights.size() == Combined.size())
187  return;
188 
189  // Fill in the new weights.
190  Weights.clear();
191  Weights.reserve(Combined.size());
192  for (const auto &I : Combined)
193  Weights.push_back(I.second);
194 }
195 
196 static void combineWeights(WeightList &Weights) {
197  // Use a hash table for many successors to keep this linear.
198  if (Weights.size() > 128) {
199  combineWeightsByHashing(Weights);
200  return;
201  }
202 
203  combineWeightsBySorting(Weights);
204 }
205 
206 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
207  assert(Shift >= 0);
208  assert(Shift < 64);
209  if (!Shift)
210  return N;
211  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
212 }
213 
214 void Distribution::normalize() {
215  // Early exit for termination nodes.
216  if (Weights.empty())
217  return;
218 
219  // Only bother if there are multiple successors.
220  if (Weights.size() > 1)
221  combineWeights(Weights);
222 
223  // Early exit when combined into a single successor.
224  if (Weights.size() == 1) {
225  Total = 1;
226  Weights.front().Amount = 1;
227  return;
228  }
229 
230  // Determine how much to shift right so that the total fits into 32-bits.
231  //
232  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
233  // for each weight can cause a 32-bit overflow.
234  int Shift = 0;
235  if (DidOverflow)
236  Shift = 33;
237  else if (Total > UINT32_MAX)
238  Shift = 33 - countLeadingZeros(Total);
239 
240  // Early exit if nothing needs to be scaled.
241  if (!Shift) {
242  // If we didn't overflow then combineWeights() shouldn't have changed the
243  // sum of the weights, but let's double-check.
244  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
245  [](uint64_t Sum, const Weight &W) {
246  return Sum + W.Amount;
247  }) &&
248  "Expected total to be correct");
249  return;
250  }
251 
252  // Recompute the total through accumulation (rather than shifting it) so that
253  // it's accurate after shifting and any changes combineWeights() made above.
254  Total = 0;
255 
256  // Sum the weights to each node and shift right if necessary.
257  for (Weight &W : Weights) {
258  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
259  // can round here without concern about overflow.
260  assert(W.TargetNode.isValid());
261  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
262  assert(W.Amount <= UINT32_MAX);
263 
264  // Update the total.
265  Total += W.Amount;
266  }
267  assert(Total <= UINT32_MAX);
268 }
269 
271  // Swap with a default-constructed std::vector, since std::vector<>::clear()
272  // does not actually clear heap storage.
273  std::vector<FrequencyData>().swap(Freqs);
274  IsIrrLoopHeader.clear();
275  std::vector<WorkingData>().swap(Working);
276  Loops.clear();
277 }
278 
279 /// Clear all memory not needed downstream.
280 ///
281 /// Releases all memory not used downstream. In particular, saves Freqs.
283  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
284  SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
285  BFI.clear();
286  BFI.Freqs = std::move(SavedFreqs);
287  BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
288 }
289 
291  const LoopData *OuterLoop,
292  const BlockNode &Pred,
293  const BlockNode &Succ,
294  uint64_t Weight) {
295  if (!Weight)
296  Weight = 1;
297 
298  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
299  return OuterLoop && OuterLoop->isHeader(Node);
300  };
301 
302  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
303 
304 #ifndef NDEBUG
305  auto debugSuccessor = [&](const char *Type) {
306  dbgs() << " =>"
307  << " [" << Type << "] weight = " << Weight;
308  if (!isLoopHeader(Resolved))
309  dbgs() << ", succ = " << getBlockName(Succ);
310  if (Resolved != Succ)
311  dbgs() << ", resolved = " << getBlockName(Resolved);
312  dbgs() << "\n";
313  };
314  (void)debugSuccessor;
315 #endif
316 
317  if (isLoopHeader(Resolved)) {
318  LLVM_DEBUG(debugSuccessor("backedge"));
319  Dist.addBackedge(Resolved, Weight);
320  return true;
321  }
322 
323  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
324  LLVM_DEBUG(debugSuccessor(" exit "));
325  Dist.addExit(Resolved, Weight);
326  return true;
327  }
328 
329  if (Resolved < Pred) {
330  if (!isLoopHeader(Pred)) {
331  // If OuterLoop is an irreducible loop, we can't actually handle this.
332  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
333  "unhandled irreducible control flow");
334 
335  // Irreducible backedge. Abort.
336  LLVM_DEBUG(debugSuccessor("abort!!!"));
337  return false;
338  }
339 
340  // If "Pred" is a loop header, then this isn't really a backedge; rather,
341  // OuterLoop must be irreducible. These false backedges can come only from
342  // secondary loop headers.
343  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
344  "unhandled irreducible control flow");
345  }
346 
347  LLVM_DEBUG(debugSuccessor(" local "));
348  Dist.addLocal(Resolved, Weight);
349  return true;
350 }
351 
353  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
354  // Copy the exit map into Dist.
355  for (const auto &I : Loop.Exits)
356  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
357  I.second.getMass()))
358  // Irreducible backedge.
359  return false;
360 
361  return true;
362 }
363 
364 /// Compute the loop scale for a loop.
366  // Compute loop scale.
367  LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
368 
369  // Infinite loops need special handling. If we give the back edge an infinite
370  // mass, they may saturate all the other scales in the function down to 1,
371  // making all the other region temperatures look exactly the same. Choose an
372  // arbitrary scale to avoid these issues.
373  //
374  // FIXME: An alternate way would be to select a symbolic scale which is later
375  // replaced to be the maximum of all computed scales plus 1. This would
376  // appropriately describe the loop as having a large scale, without skewing
377  // the final frequency computation.
378  const Scaled64 InfiniteLoopScale(1, 12);
379 
380  // LoopScale == 1 / ExitMass
381  // ExitMass == HeadMass - BackedgeMass
382  BlockMass TotalBackedgeMass;
383  for (auto &Mass : Loop.BackedgeMass)
384  TotalBackedgeMass += Mass;
385  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
386 
387  // Block scale stores the inverse of the scale. If this is an infinite loop,
388  // its exit mass will be zero. In this case, use an arbitrary scale for the
389  // loop scale.
390  Loop.Scale =
391  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
392 
393  LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
394  << BlockMass::getFull() << " - " << TotalBackedgeMass
395  << ")\n"
396  << " - scale = " << Loop.Scale << "\n");
397 }
398 
399 /// Package up a loop.
401  LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
402 
403  // Clear the subloop exits to prevent quadratic memory usage.
404  for (const BlockNode &M : Loop.Nodes) {
405  if (auto *Loop = Working[M.Index].getPackagedLoop())
406  Loop->Exits.clear();
407  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
408  }
409  Loop.IsPackaged = true;
410 }
411 
412 #ifndef NDEBUG
414  const DitheringDistributer &D, const BlockNode &T,
415  const BlockMass &M, const char *Desc) {
416  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
417  if (Desc)
418  dbgs() << " [" << Desc << "]";
419  if (T.isValid())
420  dbgs() << " to " << BFI.getBlockName(T);
421  dbgs() << "\n";
422 }
423 #endif
424 
426  LoopData *OuterLoop,
427  Distribution &Dist) {
428  BlockMass Mass = Working[Source.Index].getMass();
429  LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
430 
431  // Distribute mass to successors as laid out in Dist.
432  DitheringDistributer D(Dist, Mass);
433 
434  for (const Weight &W : Dist.Weights) {
435  // Check for a local edge (non-backedge and non-exit).
436  BlockMass Taken = D.takeMass(W.Amount);
437  if (W.Type == Weight::Local) {
438  Working[W.TargetNode.Index].getMass() += Taken;
439  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
440  continue;
441  }
442 
443  // Backedges and exits only make sense if we're processing a loop.
444  assert(OuterLoop && "backedge or exit outside of loop");
445 
446  // Check for a backedge.
447  if (W.Type == Weight::Backedge) {
448  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
449  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
450  continue;
451  }
452 
453  // This must be an exit.
454  assert(W.Type == Weight::Exit);
455  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
456  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
457  }
458 }
459 
461  const Scaled64 &Min, const Scaled64 &Max) {
462  // Scale the Factor to a size that creates integers. Ideally, integers would
463  // be scaled so that Max == UINT64_MAX so that they can be best
464  // differentiated. However, in the presence of large frequency values, small
465  // frequencies are scaled down to 1, making it impossible to differentiate
466  // small, unequal numbers. When the spread between Min and Max frequencies
467  // fits well within MaxBits, we make the scale be at least 8.
468  const unsigned MaxBits = 64;
469  const unsigned SpreadBits = (Max / Min).lg();
470  Scaled64 ScalingFactor;
471  if (SpreadBits <= MaxBits - 3) {
472  // If the values are small enough, make the scaling factor at least 8 to
473  // allow distinguishing small values.
474  ScalingFactor = Min.inverse();
475  ScalingFactor <<= 3;
476  } else {
477  // If the values need more than MaxBits to be represented, saturate small
478  // frequency values down to 1 by using a scaling factor that benefits large
479  // frequency values.
480  ScalingFactor = Scaled64(1, MaxBits) / Max;
481  }
482 
483  // Translate the floats to integers.
484  LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
485  << ", factor = " << ScalingFactor << "\n");
486  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
487  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
488  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
489  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
490  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
491  << ", int = " << BFI.Freqs[Index].Integer << "\n");
492  }
493 }
494 
495 /// Unwrap a loop package.
496 ///
497 /// Visits all the members of a loop, adjusting their BlockData according to
498 /// the loop's pseudo-node.
499 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
500  LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
501  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
502  << "\n");
503  Loop.Scale *= Loop.Mass.toScaled();
504  Loop.IsPackaged = false;
505  LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
506 
507  // Propagate the head scale through the loop. Since members are visited in
508  // RPO, the head scale will be updated by the loop scale first, and then the
509  // final head scale will be used for updated the rest of the members.
510  for (const BlockNode &N : Loop.Nodes) {
511  const auto &Working = BFI.Working[N.Index];
512  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
513  : BFI.Freqs[N.Index].Scaled;
514  Scaled64 New = Loop.Scale * F;
515  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
516  << New << "\n");
517  F = New;
518  }
519 }
520 
522  // Set initial frequencies from loop-local masses.
523  for (size_t Index = 0; Index < Working.size(); ++Index)
524  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
525 
526  for (LoopData &Loop : Loops)
527  unwrapLoop(*this, Loop);
528 }
529 
531  // Unwrap loop packages in reverse post-order, tracking min and max
532  // frequencies.
533  auto Min = Scaled64::getLargest();
534  auto Max = Scaled64::getZero();
535  for (size_t Index = 0; Index < Working.size(); ++Index) {
536  // Update min/max scale.
537  Min = std::min(Min, Freqs[Index].Scaled);
538  Max = std::max(Max, Freqs[Index].Scaled);
539  }
540 
541  // Convert to integers.
542  convertFloatingToInteger(*this, Min, Max);
543 
544  // Clean up data structures.
545  cleanup(*this);
546 
547  // Print out the final stats.
548  LLVM_DEBUG(dump());
549 }
550 
553  if (!Node.isValid())
554  return 0;
555  return Freqs[Node.Index].Integer;
556 }
557 
560  const BlockNode &Node) const {
561  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
562 }
563 
566  uint64_t Freq) const {
567  auto EntryCount = F.getEntryCount();
568  if (!EntryCount)
569  return None;
570  // Use 128 bit APInt to do the arithmetic to avoid overflow.
571  APInt BlockCount(128, EntryCount.getCount());
572  APInt BlockFreq(128, Freq);
573  APInt EntryFreq(128, getEntryFreq());
574  BlockCount *= BlockFreq;
575  // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
576  // lshr by 1 gives EntryFreq/2.
577  BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
578  return BlockCount.getLimitedValue();
579 }
580 
581 bool
583  if (!Node.isValid())
584  return false;
585  return IsIrrLoopHeader.test(Node.Index);
586 }
587 
588 Scaled64
590  if (!Node.isValid())
591  return Scaled64::getZero();
592  return Freqs[Node.Index].Scaled;
593 }
594 
596  uint64_t Freq) {
597  assert(Node.isValid() && "Expected valid node");
598  assert(Node.Index < Freqs.size() && "Expected legal index");
599  Freqs[Node.Index].Integer = Freq;
600 }
601 
602 std::string
604  return {};
605 }
606 
607 std::string
609  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
610 }
611 
612 raw_ostream &
614  const BlockNode &Node) const {
615  return OS << getFloatingBlockFreq(Node);
616 }
617 
618 raw_ostream &
620  const BlockFrequency &Freq) const {
621  Scaled64 Block(Freq.getFrequency(), 0);
622  Scaled64 Entry(getEntryFreq(), 0);
623 
624  return OS << Block / Entry;
625 }
626 
628  Start = OuterLoop.getHeader();
629  Nodes.reserve(OuterLoop.Nodes.size());
630  for (auto N : OuterLoop.Nodes)
631  addNode(N);
632  indexNodes();
633 }
634 
636  Start = 0;
637  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
638  if (!BFI.Working[Index].isPackaged())
639  addNode(Index);
640  indexNodes();
641 }
642 
644  for (auto &I : Nodes)
645  Lookup[I.Node.Index] = &I;
646 }
647 
649  const BFIBase::LoopData *OuterLoop) {
650  if (OuterLoop && OuterLoop->isHeader(Succ))
651  return;
652  auto L = Lookup.find(Succ.Index);
653  if (L == Lookup.end())
654  return;
655  IrrNode &SuccIrr = *L->second;
656  Irr.Edges.push_back(&SuccIrr);
657  SuccIrr.Edges.push_front(&Irr);
658  ++SuccIrr.NumIn;
659 }
660 
661 namespace llvm {
662 
663 template <> struct GraphTraits<IrreducibleGraph> {
665  using NodeRef = const GraphT::IrrNode *;
666  using ChildIteratorType = GraphT::IrrNode::iterator;
667 
668  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
670  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
671 };
672 
673 } // end namespace llvm
674 
675 /// Find extra irreducible headers.
676 ///
677 /// Find entry blocks and other blocks with backedges, which exist when \c G
678 /// contains irreducible sub-SCCs.
681  const IrreducibleGraph &G,
682  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
683  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
684  // Map from nodes in the SCC to whether it's an entry block.
686 
687  // InSCC also acts the set of nodes in the graph. Seed it.
688  for (const auto *I : SCC)
689  InSCC[I] = false;
690 
691  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
692  auto &Irr = *I->first;
693  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
694  if (InSCC.count(P))
695  continue;
696 
697  // This is an entry block.
698  I->second = true;
699  Headers.push_back(Irr.Node);
700  LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
701  << "\n");
702  break;
703  }
704  }
705  assert(Headers.size() >= 2 &&
706  "Expected irreducible CFG; -loop-info is likely invalid");
707  if (Headers.size() == InSCC.size()) {
708  // Every block is a header.
709  llvm::sort(Headers);
710  return;
711  }
712 
713  // Look for extra headers from irreducible sub-SCCs.
714  for (const auto &I : InSCC) {
715  // Entry blocks are already headers.
716  if (I.second)
717  continue;
718 
719  auto &Irr = *I.first;
720  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
721  // Skip forward edges.
722  if (P->Node < Irr.Node)
723  continue;
724 
725  // Skip predecessors from entry blocks. These can have inverted
726  // ordering.
727  if (InSCC.lookup(P))
728  continue;
729 
730  // Store the extra header.
731  Headers.push_back(Irr.Node);
732  LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
733  << "\n");
734  break;
735  }
736  if (Headers.back() == Irr.Node)
737  // Added this as a header.
738  continue;
739 
740  // This is not a header.
741  Others.push_back(Irr.Node);
742  LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
743  }
744  llvm::sort(Headers);
745  llvm::sort(Others);
746 }
747 
750  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
751  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
752  // Translate the SCC into RPO.
753  LLVM_DEBUG(dbgs() << " - found-scc\n");
754 
755  LoopData::NodeList Headers;
756  LoopData::NodeList Others;
757  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
758 
759  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
760  Headers.end(), Others.begin(), Others.end());
761 
762  // Update loop hierarchy.
763  for (const auto &N : Loop->Nodes)
764  if (BFI.Working[N.Index].isLoopHeader())
765  BFI.Working[N.Index].Loop->Parent = &*Loop;
766  else
767  BFI.Working[N.Index].Loop = &*Loop;
768 }
769 
772  const IrreducibleGraph &G, LoopData *OuterLoop,
773  std::list<LoopData>::iterator Insert) {
774  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
775  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
776 
777  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
778  if (I->size() < 2)
779  continue;
780 
781  // Translate the SCC into RPO.
782  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
783  }
784 
785  if (OuterLoop)
786  return make_range(std::next(Prev), Insert);
787  return make_range(Loops.begin(), Insert);
788 }
789 
790 void
792  OuterLoop.Exits.clear();
793  for (auto &Mass : OuterLoop.BackedgeMass)
794  Mass = BlockMass::getEmpty();
795  auto O = OuterLoop.Nodes.begin() + 1;
796  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
797  if (!Working[I->Index].isPackaged())
798  *O++ = *I;
799  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
800 }
801 
803  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
804 
805  // Since the loop has more than one header block, the mass flowing back into
806  // each header will be different. Adjust the mass in each header loop to
807  // reflect the masses flowing through back edges.
808  //
809  // To do this, we distribute the initial mass using the backedge masses
810  // as weights for the distribution.
811  BlockMass LoopMass = BlockMass::getFull();
812  Distribution Dist;
813 
814  LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
815  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
816  auto &HeaderNode = Loop.Nodes[H];
817  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
818  LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
819  << getBlockName(HeaderNode) << ": " << BackedgeMass
820  << "\n");
821  if (BackedgeMass.getMass() > 0)
822  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
823  else
824  LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
825  }
826 
827  DitheringDistributer D(Dist, LoopMass);
828 
829  LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
830  << " to headers using above weights\n");
831  for (const Weight &W : Dist.Weights) {
832  BlockMass Taken = D.takeMass(W.Amount);
833  assert(W.Type == Weight::Local && "all weights should be local");
834  Working[W.TargetNode.Index].getMass() = Taken;
835  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
836  }
837 }
838 
840  BlockMass LoopMass = BlockMass::getFull();
841  DitheringDistributer D(Dist, LoopMass);
842  for (const Weight &W : Dist.Weights) {
843  BlockMass Taken = D.takeMass(W.Amount);
844  assert(W.Type == Weight::Local && "all weights should be local");
845  Working[W.TargetNode.Index].getMass() = Taken;
846  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
847  }
848 }
const NoneType None
Definition: None.h:23
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool IsPackaged
Whether this has been packaged.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:464
static char getHexDigit(int N)
void push_back(const T &Elt)
Definition: SmallVector.h:211
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
raw_ostream & print(raw_ostream &OS) const
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
F(f)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
Hexagon Hardware Loops
ProfileCount getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1384
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
virtual std::string getBlockName(const BlockNode &Node) const
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
This file implements a class to represent arbitrary precision integral constant values and operations...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:225
#define UINT64_MAX
Definition: DataTypes.h:83
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode *> &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Graph of irreducible control flow.
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
std::string getLoopName(const LoopData &Loop) const
bool isHeader(const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
bool isIrrLoopHeader(const BlockNode &Node)
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode *> &SCC)
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
ScaledNumber inverse() const
Definition: ScaledNumber.h:677
void distributeIrrLoopHeaderMass(Distribution &Dist)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define H(x, y, z)
Definition: MD5.cpp:57
Distribution of unscaled probability weight.
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
IntT toInt() const
Convert to the given integer type.
Definition: ScaledNumber.h:780
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
iterator erase(const_iterator CI)
Definition: SmallVector.h:438
size_t size() const
Definition: SmallVector.h:52
void addExit(const BlockNode &Node, uint64_t Amount)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
Definition: APInt.h:69
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:512
static void combineWeightsByHashing(WeightList &Weights)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
void packageLoop(LoopData &Loop)
Package up a loop.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ScaledNumber< uint64_t > Scaled64
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
BlockFrequency getBlockFreq(const BlockNode &Node) const
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.