LLVM  4.0.0
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loops should be simplified before this analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/IR/Function.h"
18 #include <numeric>
19 
20 using namespace llvm;
21 using namespace llvm::bfi_detail;
22 
23 #define DEBUG_TYPE "block-freq"
24 
26  if (isFull())
27  return ScaledNumber<uint64_t>(1, 0);
28  return ScaledNumber<uint64_t>(getMass() + 1, -64);
29 }
30 
32 
33 static char getHexDigit(int N) {
34  assert(N < 16);
35  if (N < 10)
36  return '0' + N;
37  return 'a' + N - 10;
38 }
39 
41  for (int Digits = 0; Digits < 16; ++Digits)
42  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
43  return OS;
44 }
45 
46 namespace {
47 
49 typedef BlockFrequencyInfoImplBase::Distribution Distribution;
54 typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
55 
56 /// \brief Dithering mass distributer.
57 ///
58 /// This class splits up a single mass into portions by weight, dithering to
59 /// spread out error. No mass is lost. The dithering precision depends on the
60 /// precision of the product of \a BlockMass and \a BranchProbability.
61 ///
62 /// The distribution algorithm follows.
63 ///
64 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
65 /// mass to distribute in \a RemMass.
66 ///
67 /// 2. For each portion:
68 ///
69 /// 1. Construct a branch probability, P, as the portion's weight divided
70 /// by the current value of \a RemWeight.
71 /// 2. Calculate the portion's mass as \a RemMass times P.
72 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
73 /// the current portion's weight and mass.
74 struct DitheringDistributer {
75  uint32_t RemWeight;
76  BlockMass RemMass;
77 
78  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
79 
80  BlockMass takeMass(uint32_t Weight);
81 };
82 
83 } // end anonymous namespace
84 
85 DitheringDistributer::DitheringDistributer(Distribution &Dist,
86  const BlockMass &Mass) {
87  Dist.normalize();
88  RemWeight = Dist.Total;
89  RemMass = Mass;
90 }
91 
92 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
93  assert(Weight && "invalid weight");
94  assert(Weight <= RemWeight);
95  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
96 
97  // Decrement totals (dither).
98  RemWeight -= Weight;
99  RemMass -= Mass;
100  return Mass;
101 }
102 
103 void Distribution::add(const BlockNode &Node, uint64_t Amount,
104  Weight::DistType Type) {
105  assert(Amount && "invalid weight of 0");
106  uint64_t NewTotal = Total + Amount;
107 
108  // Check for overflow. It should be impossible to overflow twice.
109  bool IsOverflow = NewTotal < Total;
110  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
111  DidOverflow |= IsOverflow;
112 
113  // Update the total.
114  Total = NewTotal;
115 
116  // Save the weight.
117  Weights.push_back(Weight(Type, Node, Amount));
118 }
119 
120 static void combineWeight(Weight &W, const Weight &OtherW) {
121  assert(OtherW.TargetNode.isValid());
122  if (!W.Amount) {
123  W = OtherW;
124  return;
125  }
126  assert(W.Type == OtherW.Type);
127  assert(W.TargetNode == OtherW.TargetNode);
128  assert(OtherW.Amount && "Expected non-zero weight");
129  if (W.Amount > W.Amount + OtherW.Amount)
130  // Saturate on overflow.
131  W.Amount = UINT64_MAX;
132  else
133  W.Amount += OtherW.Amount;
134 }
135 
136 static void combineWeightsBySorting(WeightList &Weights) {
137  // Sort so edges to the same node are adjacent.
138  std::sort(Weights.begin(), Weights.end(),
139  [](const Weight &L,
140  const Weight &R) { return L.TargetNode < R.TargetNode; });
141 
142  // Combine adjacent edges.
143  WeightList::iterator O = Weights.begin();
144  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
145  ++O, (I = L)) {
146  *O = *I;
147 
148  // Find the adjacent weights to the same node.
149  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
150  combineWeight(*O, *L);
151  }
152 
153  // Erase extra entries.
154  Weights.erase(O, Weights.end());
155 }
156 
157 static void combineWeightsByHashing(WeightList &Weights) {
158  // Collect weights into a DenseMap.
159  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
160  HashTable Combined(NextPowerOf2(2 * Weights.size()));
161  for (const Weight &W : Weights)
162  combineWeight(Combined[W.TargetNode.Index], W);
163 
164  // Check whether anything changed.
165  if (Weights.size() == Combined.size())
166  return;
167 
168  // Fill in the new weights.
169  Weights.clear();
170  Weights.reserve(Combined.size());
171  for (const auto &I : Combined)
172  Weights.push_back(I.second);
173 }
174 
175 static void combineWeights(WeightList &Weights) {
176  // Use a hash table for many successors to keep this linear.
177  if (Weights.size() > 128) {
178  combineWeightsByHashing(Weights);
179  return;
180  }
181 
182  combineWeightsBySorting(Weights);
183 }
184 
185 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
186  assert(Shift >= 0);
187  assert(Shift < 64);
188  if (!Shift)
189  return N;
190  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
191 }
192 
193 void Distribution::normalize() {
194  // Early exit for termination nodes.
195  if (Weights.empty())
196  return;
197 
198  // Only bother if there are multiple successors.
199  if (Weights.size() > 1)
200  combineWeights(Weights);
201 
202  // Early exit when combined into a single successor.
203  if (Weights.size() == 1) {
204  Total = 1;
205  Weights.front().Amount = 1;
206  return;
207  }
208 
209  // Determine how much to shift right so that the total fits into 32-bits.
210  //
211  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
212  // for each weight can cause a 32-bit overflow.
213  int Shift = 0;
214  if (DidOverflow)
215  Shift = 33;
216  else if (Total > UINT32_MAX)
217  Shift = 33 - countLeadingZeros(Total);
218 
219  // Early exit if nothing needs to be scaled.
220  if (!Shift) {
221  // If we didn't overflow then combineWeights() shouldn't have changed the
222  // sum of the weights, but let's double-check.
223  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
224  [](uint64_t Sum, const Weight &W) {
225  return Sum + W.Amount;
226  }) &&
227  "Expected total to be correct");
228  return;
229  }
230 
231  // Recompute the total through accumulation (rather than shifting it) so that
232  // it's accurate after shifting and any changes combineWeights() made above.
233  Total = 0;
234 
235  // Sum the weights to each node and shift right if necessary.
236  for (Weight &W : Weights) {
237  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
238  // can round here without concern about overflow.
239  assert(W.TargetNode.isValid());
240  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
241  assert(W.Amount <= UINT32_MAX);
242 
243  // Update the total.
244  Total += W.Amount;
245  }
246  assert(Total <= UINT32_MAX);
247 }
248 
250  // Swap with a default-constructed std::vector, since std::vector<>::clear()
251  // does not actually clear heap storage.
252  std::vector<FrequencyData>().swap(Freqs);
253  std::vector<WorkingData>().swap(Working);
254  Loops.clear();
255 }
256 
257 /// \brief Clear all memory not needed downstream.
258 ///
259 /// Releases all memory not used downstream. In particular, saves Freqs.
261  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
262  BFI.clear();
263  BFI.Freqs = std::move(SavedFreqs);
264 }
265 
267  const LoopData *OuterLoop,
268  const BlockNode &Pred,
269  const BlockNode &Succ,
270  uint64_t Weight) {
271  if (!Weight)
272  Weight = 1;
273 
274  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
275  return OuterLoop && OuterLoop->isHeader(Node);
276  };
277 
278  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
279 
280 #ifndef NDEBUG
281  auto debugSuccessor = [&](const char *Type) {
282  dbgs() << " =>"
283  << " [" << Type << "] weight = " << Weight;
284  if (!isLoopHeader(Resolved))
285  dbgs() << ", succ = " << getBlockName(Succ);
286  if (Resolved != Succ)
287  dbgs() << ", resolved = " << getBlockName(Resolved);
288  dbgs() << "\n";
289  };
290  (void)debugSuccessor;
291 #endif
292 
293  if (isLoopHeader(Resolved)) {
294  DEBUG(debugSuccessor("backedge"));
295  Dist.addBackedge(Resolved, Weight);
296  return true;
297  }
298 
299  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
300  DEBUG(debugSuccessor(" exit "));
301  Dist.addExit(Resolved, Weight);
302  return true;
303  }
304 
305  if (Resolved < Pred) {
306  if (!isLoopHeader(Pred)) {
307  // If OuterLoop is an irreducible loop, we can't actually handle this.
308  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
309  "unhandled irreducible control flow");
310 
311  // Irreducible backedge. Abort.
312  DEBUG(debugSuccessor("abort!!!"));
313  return false;
314  }
315 
316  // If "Pred" is a loop header, then this isn't really a backedge; rather,
317  // OuterLoop must be irreducible. These false backedges can come only from
318  // secondary loop headers.
319  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
320  "unhandled irreducible control flow");
321  }
322 
323  DEBUG(debugSuccessor(" local "));
324  Dist.addLocal(Resolved, Weight);
325  return true;
326 }
327 
329  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
330  // Copy the exit map into Dist.
331  for (const auto &I : Loop.Exits)
332  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
333  I.second.getMass()))
334  // Irreducible backedge.
335  return false;
336 
337  return true;
338 }
339 
340 /// \brief Compute the loop scale for a loop.
342  // Compute loop scale.
343  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
344 
345  // Infinite loops need special handling. If we give the back edge an infinite
346  // mass, they may saturate all the other scales in the function down to 1,
347  // making all the other region temperatures look exactly the same. Choose an
348  // arbitrary scale to avoid these issues.
349  //
350  // FIXME: An alternate way would be to select a symbolic scale which is later
351  // replaced to be the maximum of all computed scales plus 1. This would
352  // appropriately describe the loop as having a large scale, without skewing
353  // the final frequency computation.
354  const Scaled64 InfiniteLoopScale(1, 12);
355 
356  // LoopScale == 1 / ExitMass
357  // ExitMass == HeadMass - BackedgeMass
358  BlockMass TotalBackedgeMass;
359  for (auto &Mass : Loop.BackedgeMass)
360  TotalBackedgeMass += Mass;
361  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
362 
363  // Block scale stores the inverse of the scale. If this is an infinite loop,
364  // its exit mass will be zero. In this case, use an arbitrary scale for the
365  // loop scale.
366  Loop.Scale =
367  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
368 
369  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
370  << " - " << TotalBackedgeMass << ")\n"
371  << " - scale = " << Loop.Scale << "\n");
372 }
373 
374 /// \brief Package up a loop.
376  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
377 
378  // Clear the subloop exits to prevent quadratic memory usage.
379  for (const BlockNode &M : Loop.Nodes) {
380  if (auto *Loop = Working[M.Index].getPackagedLoop())
381  Loop->Exits.clear();
382  DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
383  }
384  Loop.IsPackaged = true;
385 }
386 
387 #ifndef NDEBUG
389  const DitheringDistributer &D, const BlockNode &T,
390  const BlockMass &M, const char *Desc) {
391  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
392  if (Desc)
393  dbgs() << " [" << Desc << "]";
394  if (T.isValid())
395  dbgs() << " to " << BFI.getBlockName(T);
396  dbgs() << "\n";
397 }
398 #endif
399 
401  LoopData *OuterLoop,
402  Distribution &Dist) {
403  BlockMass Mass = Working[Source.Index].getMass();
404  DEBUG(dbgs() << " => mass: " << Mass << "\n");
405 
406  // Distribute mass to successors as laid out in Dist.
407  DitheringDistributer D(Dist, Mass);
408 
409  for (const Weight &W : Dist.Weights) {
410  // Check for a local edge (non-backedge and non-exit).
411  BlockMass Taken = D.takeMass(W.Amount);
412  if (W.Type == Weight::Local) {
413  Working[W.TargetNode.Index].getMass() += Taken;
414  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
415  continue;
416  }
417 
418  // Backedges and exits only make sense if we're processing a loop.
419  assert(OuterLoop && "backedge or exit outside of loop");
420 
421  // Check for a backedge.
422  if (W.Type == Weight::Backedge) {
423  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
424  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
425  continue;
426  }
427 
428  // This must be an exit.
429  assert(W.Type == Weight::Exit);
430  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
431  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
432  }
433 }
434 
436  const Scaled64 &Min, const Scaled64 &Max) {
437  // Scale the Factor to a size that creates integers. Ideally, integers would
438  // be scaled so that Max == UINT64_MAX so that they can be best
439  // differentiated. However, in the presence of large frequency values, small
440  // frequencies are scaled down to 1, making it impossible to differentiate
441  // small, unequal numbers. When the spread between Min and Max frequencies
442  // fits well within MaxBits, we make the scale be at least 8.
443  const unsigned MaxBits = 64;
444  const unsigned SpreadBits = (Max / Min).lg();
445  Scaled64 ScalingFactor;
446  if (SpreadBits <= MaxBits - 3) {
447  // If the values are small enough, make the scaling factor at least 8 to
448  // allow distinguishing small values.
449  ScalingFactor = Min.inverse();
450  ScalingFactor <<= 3;
451  } else {
452  // If the values need more than MaxBits to be represented, saturate small
453  // frequency values down to 1 by using a scaling factor that benefits large
454  // frequency values.
455  ScalingFactor = Scaled64(1, MaxBits) / Max;
456  }
457 
458  // Translate the floats to integers.
459  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
460  << ", factor = " << ScalingFactor << "\n");
461  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
462  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
463  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
464  DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
465  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
466  << ", int = " << BFI.Freqs[Index].Integer << "\n");
467  }
468 }
469 
470 /// \brief Unwrap a loop package.
471 ///
472 /// Visits all the members of a loop, adjusting their BlockData according to
473 /// the loop's pseudo-node.
474 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
475  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
476  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
477  << "\n");
478  Loop.Scale *= Loop.Mass.toScaled();
479  Loop.IsPackaged = false;
480  DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
481 
482  // Propagate the head scale through the loop. Since members are visited in
483  // RPO, the head scale will be updated by the loop scale first, and then the
484  // final head scale will be used for updated the rest of the members.
485  for (const BlockNode &N : Loop.Nodes) {
486  const auto &Working = BFI.Working[N.Index];
487  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
488  : BFI.Freqs[N.Index].Scaled;
489  Scaled64 New = Loop.Scale * F;
490  DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
491  << "\n");
492  F = New;
493  }
494 }
495 
497  // Set initial frequencies from loop-local masses.
498  for (size_t Index = 0; Index < Working.size(); ++Index)
499  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
500 
501  for (LoopData &Loop : Loops)
502  unwrapLoop(*this, Loop);
503 }
504 
506  // Unwrap loop packages in reverse post-order, tracking min and max
507  // frequencies.
508  auto Min = Scaled64::getLargest();
509  auto Max = Scaled64::getZero();
510  for (size_t Index = 0; Index < Working.size(); ++Index) {
511  // Update min/max scale.
512  Min = std::min(Min, Freqs[Index].Scaled);
513  Max = std::max(Max, Freqs[Index].Scaled);
514  }
515 
516  // Convert to integers.
517  convertFloatingToInteger(*this, Min, Max);
518 
519  // Clean up data structures.
520  cleanup(*this);
521 
522  // Print out the final stats.
523  DEBUG(dump());
524 }
525 
528  if (!Node.isValid())
529  return 0;
530  return Freqs[Node.Index].Integer;
531 }
532 
535  const BlockNode &Node) const {
536  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
537 }
538 
541  uint64_t Freq) const {
542  auto EntryCount = F.getEntryCount();
543  if (!EntryCount)
544  return None;
545  // Use 128 bit APInt to do the arithmetic to avoid overflow.
546  APInt BlockCount(128, EntryCount.getValue());
547  APInt BlockFreq(128, Freq);
548  APInt EntryFreq(128, getEntryFreq());
549  BlockCount *= BlockFreq;
550  BlockCount = BlockCount.udiv(EntryFreq);
551  return BlockCount.getLimitedValue();
552 }
553 
554 Scaled64
556  if (!Node.isValid())
557  return Scaled64::getZero();
558  return Freqs[Node.Index].Scaled;
559 }
560 
562  uint64_t Freq) {
563  assert(Node.isValid() && "Expected valid node");
564  assert(Node.Index < Freqs.size() && "Expected legal index");
565  Freqs[Node.Index].Integer = Freq;
566 }
567 
568 std::string
570  return std::string();
571 }
572 
573 std::string
575  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
576 }
577 
578 raw_ostream &
580  const BlockNode &Node) const {
581  return OS << getFloatingBlockFreq(Node);
582 }
583 
584 raw_ostream &
586  const BlockFrequency &Freq) const {
587  Scaled64 Block(Freq.getFrequency(), 0);
588  Scaled64 Entry(getEntryFreq(), 0);
589 
590  return OS << Block / Entry;
591 }
592 
594  Start = OuterLoop.getHeader();
595  Nodes.reserve(OuterLoop.Nodes.size());
596  for (auto N : OuterLoop.Nodes)
597  addNode(N);
598  indexNodes();
599 }
600 
602  Start = 0;
603  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
604  if (!BFI.Working[Index].isPackaged())
605  addNode(Index);
606  indexNodes();
607 }
608 
610  for (auto &I : Nodes)
611  Lookup[I.Node.Index] = &I;
612 }
613 
615  const BFIBase::LoopData *OuterLoop) {
616  if (OuterLoop && OuterLoop->isHeader(Succ))
617  return;
618  auto L = Lookup.find(Succ.Index);
619  if (L == Lookup.end())
620  return;
621  IrrNode &SuccIrr = *L->second;
622  Irr.Edges.push_back(&SuccIrr);
623  SuccIrr.Edges.push_front(&Irr);
624  ++SuccIrr.NumIn;
625 }
626 
627 namespace llvm {
628 template <> struct GraphTraits<IrreducibleGraph> {
630 
631  typedef const GraphT::IrrNode *NodeRef;
632  typedef GraphT::IrrNode::iterator ChildIteratorType;
633 
634  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
636  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
637 };
638 } // end namespace llvm
639 
640 /// \brief Find extra irreducible headers.
641 ///
642 /// Find entry blocks and other blocks with backedges, which exist when \c G
643 /// contains irreducible sub-SCCs.
646  const IrreducibleGraph &G,
647  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
648  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
649  // Map from nodes in the SCC to whether it's an entry block.
651 
652  // InSCC also acts the set of nodes in the graph. Seed it.
653  for (const auto *I : SCC)
654  InSCC[I] = false;
655 
656  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
657  auto &Irr = *I->first;
658  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
659  if (InSCC.count(P))
660  continue;
661 
662  // This is an entry block.
663  I->second = true;
664  Headers.push_back(Irr.Node);
665  DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
666  break;
667  }
668  }
669  assert(Headers.size() >= 2 &&
670  "Expected irreducible CFG; -loop-info is likely invalid");
671  if (Headers.size() == InSCC.size()) {
672  // Every block is a header.
673  std::sort(Headers.begin(), Headers.end());
674  return;
675  }
676 
677  // Look for extra headers from irreducible sub-SCCs.
678  for (const auto &I : InSCC) {
679  // Entry blocks are already headers.
680  if (I.second)
681  continue;
682 
683  auto &Irr = *I.first;
684  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
685  // Skip forward edges.
686  if (P->Node < Irr.Node)
687  continue;
688 
689  // Skip predecessors from entry blocks. These can have inverted
690  // ordering.
691  if (InSCC.lookup(P))
692  continue;
693 
694  // Store the extra header.
695  Headers.push_back(Irr.Node);
696  DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
697  break;
698  }
699  if (Headers.back() == Irr.Node)
700  // Added this as a header.
701  continue;
702 
703  // This is not a header.
704  Others.push_back(Irr.Node);
705  DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
706  }
707  std::sort(Headers.begin(), Headers.end());
708  std::sort(Others.begin(), Others.end());
709 }
710 
713  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
714  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
715  // Translate the SCC into RPO.
716  DEBUG(dbgs() << " - found-scc\n");
717 
718  LoopData::NodeList Headers;
719  LoopData::NodeList Others;
720  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
721 
722  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
723  Headers.end(), Others.begin(), Others.end());
724 
725  // Update loop hierarchy.
726  for (const auto &N : Loop->Nodes)
727  if (BFI.Working[N.Index].isLoopHeader())
728  BFI.Working[N.Index].Loop->Parent = &*Loop;
729  else
730  BFI.Working[N.Index].Loop = &*Loop;
731 }
732 
735  const IrreducibleGraph &G, LoopData *OuterLoop,
736  std::list<LoopData>::iterator Insert) {
737  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
738  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
739 
740  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
741  if (I->size() < 2)
742  continue;
743 
744  // Translate the SCC into RPO.
745  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
746  }
747 
748  if (OuterLoop)
749  return make_range(std::next(Prev), Insert);
750  return make_range(Loops.begin(), Insert);
751 }
752 
753 void
755  OuterLoop.Exits.clear();
756  for (auto &Mass : OuterLoop.BackedgeMass)
757  Mass = BlockMass::getEmpty();
758  auto O = OuterLoop.Nodes.begin() + 1;
759  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
760  if (!Working[I->Index].isPackaged())
761  *O++ = *I;
762  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
763 }
764 
766  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
767 
768  // Since the loop has more than one header block, the mass flowing back into
769  // each header will be different. Adjust the mass in each header loop to
770  // reflect the masses flowing through back edges.
771  //
772  // To do this, we distribute the initial mass using the backedge masses
773  // as weights for the distribution.
774  BlockMass LoopMass = BlockMass::getFull();
775  Distribution Dist;
776 
777  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
778  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
779  auto &HeaderNode = Loop.Nodes[H];
780  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
781  DEBUG(dbgs() << " - Add back edge mass for node "
782  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
783  if (BackedgeMass.getMass() > 0)
784  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
785  else
786  DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
787  }
788 
789  DitheringDistributer D(Dist, LoopMass);
790 
791  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
792  << " to headers using above weights\n");
793  for (const Weight &W : Dist.Weights) {
794  BlockMass Taken = D.takeMass(W.Amount);
795  assert(W.Type == Weight::Local && "all weights should be local");
796  Working[W.TargetNode.Index].getMass() = Taken;
797  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
798  }
799 }
MachineLoop * L
const NoneType None
Definition: None.h:23
void push_back(const T &Elt)
Definition: SmallVector.h:211
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
bool IsPackaged
Whether this has been packaged.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
static char getHexDigit(int N)
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
raw_ostream & print(raw_ostream &OS) const
Hexagon Hardware Loops
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
BlockFrequency getBlockFreq(const BlockNode &Node) const
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
std::vector< NodeAddr< NodeBase * > > NodeList
Definition: RDFGraph.h:610
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
#define F(x, y, z)
Definition: MD5.cpp:51
Graph of irreducible control flow.
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:239
ScaledNumber inverse() const
Definition: ScaledNumber.h:678
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1287
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
std::string getLoopName(const LoopData &Loop) const
#define H(x, y, z)
Definition: MD5.cpp:53
Distribution of unscaled probability weight.
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
bool isHeader(const BlockNode &Node) const
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:619
iterator erase(const_iterator CI)
Definition: SmallVector.h:431
void addExit(const BlockNode &Node, uint64_t Amount)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
virtual std::string getBlockName(const BlockNode &Node) const
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:122
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
Definition: APInt.h:77
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
static void combineWeightsByHashing(WeightList &Weights)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
void packageLoop(LoopData &Loop)
Package up a loop.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.