LLVM  3.7.0
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loops should be simplified before this analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/SCCIterator.h"
17 #include <numeric>
18 
19 using namespace llvm;
20 using namespace llvm::bfi_detail;
21 
22 #define DEBUG_TYPE "block-freq"
23 
25  if (isFull())
26  return ScaledNumber<uint64_t>(1, 0);
27  return ScaledNumber<uint64_t>(getMass() + 1, -64);
28 }
29 
30 void BlockMass::dump() const { print(dbgs()); }
31 
32 static char getHexDigit(int N) {
33  assert(N < 16);
34  if (N < 10)
35  return '0' + N;
36  return 'a' + N - 10;
37 }
39  for (int Digits = 0; Digits < 16; ++Digits)
40  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
41  return OS;
42 }
43 
44 namespace {
45 
47 typedef BlockFrequencyInfoImplBase::Distribution Distribution;
52 typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
53 
54 /// \brief Dithering mass distributer.
55 ///
56 /// This class splits up a single mass into portions by weight, dithering to
57 /// spread out error. No mass is lost. The dithering precision depends on the
58 /// precision of the product of \a BlockMass and \a BranchProbability.
59 ///
60 /// The distribution algorithm follows.
61 ///
62 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
63 /// mass to distribute in \a RemMass.
64 ///
65 /// 2. For each portion:
66 ///
67 /// 1. Construct a branch probability, P, as the portion's weight divided
68 /// by the current value of \a RemWeight.
69 /// 2. Calculate the portion's mass as \a RemMass times P.
70 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
71 /// the current portion's weight and mass.
72 struct DitheringDistributer {
73  uint32_t RemWeight;
74  BlockMass RemMass;
75 
76  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
77 
78  BlockMass takeMass(uint32_t Weight);
79 };
80 
81 } // end namespace
82 
83 DitheringDistributer::DitheringDistributer(Distribution &Dist,
84  const BlockMass &Mass) {
85  Dist.normalize();
86  RemWeight = Dist.Total;
87  RemMass = Mass;
88 }
89 
90 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
91  assert(Weight && "invalid weight");
92  assert(Weight <= RemWeight);
93  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
94 
95  // Decrement totals (dither).
96  RemWeight -= Weight;
97  RemMass -= Mass;
98  return Mass;
99 }
100 
101 void Distribution::add(const BlockNode &Node, uint64_t Amount,
102  Weight::DistType Type) {
103  assert(Amount && "invalid weight of 0");
104  uint64_t NewTotal = Total + Amount;
105 
106  // Check for overflow. It should be impossible to overflow twice.
107  bool IsOverflow = NewTotal < Total;
108  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
109  DidOverflow |= IsOverflow;
110 
111  // Update the total.
112  Total = NewTotal;
113 
114  // Save the weight.
115  Weights.push_back(Weight(Type, Node, Amount));
116 }
117 
118 static void combineWeight(Weight &W, const Weight &OtherW) {
119  assert(OtherW.TargetNode.isValid());
120  if (!W.Amount) {
121  W = OtherW;
122  return;
123  }
124  assert(W.Type == OtherW.Type);
125  assert(W.TargetNode == OtherW.TargetNode);
126  assert(OtherW.Amount && "Expected non-zero weight");
127  if (W.Amount > W.Amount + OtherW.Amount)
128  // Saturate on overflow.
129  W.Amount = UINT64_MAX;
130  else
131  W.Amount += OtherW.Amount;
132 }
133 static void combineWeightsBySorting(WeightList &Weights) {
134  // Sort so edges to the same node are adjacent.
135  std::sort(Weights.begin(), Weights.end(),
136  [](const Weight &L,
137  const Weight &R) { return L.TargetNode < R.TargetNode; });
138 
139  // Combine adjacent edges.
140  WeightList::iterator O = Weights.begin();
141  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
142  ++O, (I = L)) {
143  *O = *I;
144 
145  // Find the adjacent weights to the same node.
146  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
147  combineWeight(*O, *L);
148  }
149 
150  // Erase extra entries.
151  Weights.erase(O, Weights.end());
152  return;
153 }
154 static void combineWeightsByHashing(WeightList &Weights) {
155  // Collect weights into a DenseMap.
156  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
157  HashTable Combined(NextPowerOf2(2 * Weights.size()));
158  for (const Weight &W : Weights)
159  combineWeight(Combined[W.TargetNode.Index], W);
160 
161  // Check whether anything changed.
162  if (Weights.size() == Combined.size())
163  return;
164 
165  // Fill in the new weights.
166  Weights.clear();
167  Weights.reserve(Combined.size());
168  for (const auto &I : Combined)
169  Weights.push_back(I.second);
170 }
171 static void combineWeights(WeightList &Weights) {
172  // Use a hash table for many successors to keep this linear.
173  if (Weights.size() > 128) {
174  combineWeightsByHashing(Weights);
175  return;
176  }
177 
178  combineWeightsBySorting(Weights);
179 }
180 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
181  assert(Shift >= 0);
182  assert(Shift < 64);
183  if (!Shift)
184  return N;
185  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
186 }
187 void Distribution::normalize() {
188  // Early exit for termination nodes.
189  if (Weights.empty())
190  return;
191 
192  // Only bother if there are multiple successors.
193  if (Weights.size() > 1)
194  combineWeights(Weights);
195 
196  // Early exit when combined into a single successor.
197  if (Weights.size() == 1) {
198  Total = 1;
199  Weights.front().Amount = 1;
200  return;
201  }
202 
203  // Determine how much to shift right so that the total fits into 32-bits.
204  //
205  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
206  // for each weight can cause a 32-bit overflow.
207  int Shift = 0;
208  if (DidOverflow)
209  Shift = 33;
210  else if (Total > UINT32_MAX)
211  Shift = 33 - countLeadingZeros(Total);
212 
213  // Early exit if nothing needs to be scaled.
214  if (!Shift) {
215  // If we didn't overflow then combineWeights() shouldn't have changed the
216  // sum of the weights, but let's double-check.
217  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
218  [](uint64_t Sum, const Weight &W) {
219  return Sum + W.Amount;
220  }) &&
221  "Expected total to be correct");
222  return;
223  }
224 
225  // Recompute the total through accumulation (rather than shifting it) so that
226  // it's accurate after shifting and any changes combineWeights() made above.
227  Total = 0;
228 
229  // Sum the weights to each node and shift right if necessary.
230  for (Weight &W : Weights) {
231  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
232  // can round here without concern about overflow.
233  assert(W.TargetNode.isValid());
234  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
235  assert(W.Amount <= UINT32_MAX);
236 
237  // Update the total.
238  Total += W.Amount;
239  }
240  assert(Total <= UINT32_MAX);
241 }
242 
244  // Swap with a default-constructed std::vector, since std::vector<>::clear()
245  // does not actually clear heap storage.
246  std::vector<FrequencyData>().swap(Freqs);
247  std::vector<WorkingData>().swap(Working);
248  Loops.clear();
249 }
250 
251 /// \brief Clear all memory not needed downstream.
252 ///
253 /// Releases all memory not used downstream. In particular, saves Freqs.
255  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
256  BFI.clear();
257  BFI.Freqs = std::move(SavedFreqs);
258 }
259 
261  const LoopData *OuterLoop,
262  const BlockNode &Pred,
263  const BlockNode &Succ,
264  uint64_t Weight) {
265  if (!Weight)
266  Weight = 1;
267 
268  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
269  return OuterLoop && OuterLoop->isHeader(Node);
270  };
271 
272  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
273 
274 #ifndef NDEBUG
275  auto debugSuccessor = [&](const char *Type) {
276  dbgs() << " =>"
277  << " [" << Type << "] weight = " << Weight;
278  if (!isLoopHeader(Resolved))
279  dbgs() << ", succ = " << getBlockName(Succ);
280  if (Resolved != Succ)
281  dbgs() << ", resolved = " << getBlockName(Resolved);
282  dbgs() << "\n";
283  };
284  (void)debugSuccessor;
285 #endif
286 
287  if (isLoopHeader(Resolved)) {
288  DEBUG(debugSuccessor("backedge"));
289  Dist.addBackedge(Resolved, Weight);
290  return true;
291  }
292 
293  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
294  DEBUG(debugSuccessor(" exit "));
295  Dist.addExit(Resolved, Weight);
296  return true;
297  }
298 
299  if (Resolved < Pred) {
300  if (!isLoopHeader(Pred)) {
301  // If OuterLoop is an irreducible loop, we can't actually handle this.
302  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
303  "unhandled irreducible control flow");
304 
305  // Irreducible backedge. Abort.
306  DEBUG(debugSuccessor("abort!!!"));
307  return false;
308  }
309 
310  // If "Pred" is a loop header, then this isn't really a backedge; rather,
311  // OuterLoop must be irreducible. These false backedges can come only from
312  // secondary loop headers.
313  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
314  "unhandled irreducible control flow");
315  }
316 
317  DEBUG(debugSuccessor(" local "));
318  Dist.addLocal(Resolved, Weight);
319  return true;
320 }
321 
323  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
324  // Copy the exit map into Dist.
325  for (const auto &I : Loop.Exits)
326  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
327  I.second.getMass()))
328  // Irreducible backedge.
329  return false;
330 
331  return true;
332 }
333 
334 /// \brief Compute the loop scale for a loop.
336  // Compute loop scale.
337  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
338 
339  // Infinite loops need special handling. If we give the back edge an infinite
340  // mass, they may saturate all the other scales in the function down to 1,
341  // making all the other region temperatures look exactly the same. Choose an
342  // arbitrary scale to avoid these issues.
343  //
344  // FIXME: An alternate way would be to select a symbolic scale which is later
345  // replaced to be the maximum of all computed scales plus 1. This would
346  // appropriately describe the loop as having a large scale, without skewing
347  // the final frequency computation.
348  const Scaled64 InifiniteLoopScale(1, 12);
349 
350  // LoopScale == 1 / ExitMass
351  // ExitMass == HeadMass - BackedgeMass
352  BlockMass TotalBackedgeMass;
353  for (auto &Mass : Loop.BackedgeMass)
354  TotalBackedgeMass += Mass;
355  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
356 
357  // Block scale stores the inverse of the scale. If this is an infinite loop,
358  // its exit mass will be zero. In this case, use an arbitrary scale for the
359  // loop scale.
360  Loop.Scale =
361  ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
362 
363  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
364  << " - " << TotalBackedgeMass << ")\n"
365  << " - scale = " << Loop.Scale << "\n");
366 }
367 
368 /// \brief Package up a loop.
370  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
371 
372  // Clear the subloop exits to prevent quadratic memory usage.
373  for (const BlockNode &M : Loop.Nodes) {
374  if (auto *Loop = Working[M.Index].getPackagedLoop())
375  Loop->Exits.clear();
376  DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
377  }
378  Loop.IsPackaged = true;
379 }
380 
381 #ifndef NDEBUG
383  const DitheringDistributer &D, const BlockNode &T,
384  const BlockMass &M, const char *Desc) {
385  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
386  if (Desc)
387  dbgs() << " [" << Desc << "]";
388  if (T.isValid())
389  dbgs() << " to " << BFI.getBlockName(T);
390  dbgs() << "\n";
391 }
392 #endif
393 
395  LoopData *OuterLoop,
396  Distribution &Dist) {
397  BlockMass Mass = Working[Source.Index].getMass();
398  DEBUG(dbgs() << " => mass: " << Mass << "\n");
399 
400  // Distribute mass to successors as laid out in Dist.
401  DitheringDistributer D(Dist, Mass);
402 
403  for (const Weight &W : Dist.Weights) {
404  // Check for a local edge (non-backedge and non-exit).
405  BlockMass Taken = D.takeMass(W.Amount);
406  if (W.Type == Weight::Local) {
407  Working[W.TargetNode.Index].getMass() += Taken;
408  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
409  continue;
410  }
411 
412  // Backedges and exits only make sense if we're processing a loop.
413  assert(OuterLoop && "backedge or exit outside of loop");
414 
415  // Check for a backedge.
416  if (W.Type == Weight::Backedge) {
417  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
418  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
419  continue;
420  }
421 
422  // This must be an exit.
423  assert(W.Type == Weight::Exit);
424  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
425  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
426  }
427 }
428 
430  const Scaled64 &Min, const Scaled64 &Max) {
431  // Scale the Factor to a size that creates integers. Ideally, integers would
432  // be scaled so that Max == UINT64_MAX so that they can be best
433  // differentiated. However, in the presence of large frequency values, small
434  // frequencies are scaled down to 1, making it impossible to differentiate
435  // small, unequal numbers. When the spread between Min and Max frequencies
436  // fits well within MaxBits, we make the scale be at least 8.
437  const unsigned MaxBits = 64;
438  const unsigned SpreadBits = (Max / Min).lg();
439  Scaled64 ScalingFactor;
440  if (SpreadBits <= MaxBits - 3) {
441  // If the values are small enough, make the scaling factor at least 8 to
442  // allow distinguishing small values.
443  ScalingFactor = Min.inverse();
444  ScalingFactor <<= 3;
445  } else {
446  // If the values need more than MaxBits to be represented, saturate small
447  // frequency values down to 1 by using a scaling factor that benefits large
448  // frequency values.
449  ScalingFactor = Scaled64(1, MaxBits) / Max;
450  }
451 
452  // Translate the floats to integers.
453  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
454  << ", factor = " << ScalingFactor << "\n");
455  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
456  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
457  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
458  DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
459  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
460  << ", int = " << BFI.Freqs[Index].Integer << "\n");
461  }
462 }
463 
464 /// \brief Unwrap a loop package.
465 ///
466 /// Visits all the members of a loop, adjusting their BlockData according to
467 /// the loop's pseudo-node.
468 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
469  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
470  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
471  << "\n");
472  Loop.Scale *= Loop.Mass.toScaled();
473  Loop.IsPackaged = false;
474  DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
475 
476  // Propagate the head scale through the loop. Since members are visited in
477  // RPO, the head scale will be updated by the loop scale first, and then the
478  // final head scale will be used for updated the rest of the members.
479  for (const BlockNode &N : Loop.Nodes) {
480  const auto &Working = BFI.Working[N.Index];
481  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
482  : BFI.Freqs[N.Index].Scaled;
483  Scaled64 New = Loop.Scale * F;
484  DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
485  << "\n");
486  F = New;
487  }
488 }
489 
491  // Set initial frequencies from loop-local masses.
492  for (size_t Index = 0; Index < Working.size(); ++Index)
493  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
494 
495  for (LoopData &Loop : Loops)
496  unwrapLoop(*this, Loop);
497 }
498 
500  // Unwrap loop packages in reverse post-order, tracking min and max
501  // frequencies.
502  auto Min = Scaled64::getLargest();
503  auto Max = Scaled64::getZero();
504  for (size_t Index = 0; Index < Working.size(); ++Index) {
505  // Update min/max scale.
506  Min = std::min(Min, Freqs[Index].Scaled);
507  Max = std::max(Max, Freqs[Index].Scaled);
508  }
509 
510  // Convert to integers.
511  convertFloatingToInteger(*this, Min, Max);
512 
513  // Clean up data structures.
514  cleanup(*this);
515 
516  // Print out the final stats.
517  DEBUG(dump());
518 }
519 
522  if (!Node.isValid())
523  return 0;
524  return Freqs[Node.Index].Integer;
525 }
526 Scaled64
528  if (!Node.isValid())
529  return Scaled64::getZero();
530  return Freqs[Node.Index].Scaled;
531 }
532 
533 std::string
535  return std::string();
536 }
537 std::string
539  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
540 }
541 
542 raw_ostream &
544  const BlockNode &Node) const {
545  return OS << getFloatingBlockFreq(Node);
546 }
547 
548 raw_ostream &
550  const BlockFrequency &Freq) const {
551  Scaled64 Block(Freq.getFrequency(), 0);
552  Scaled64 Entry(getEntryFreq(), 0);
553 
554  return OS << Block / Entry;
555 }
556 
558  Start = OuterLoop.getHeader();
559  Nodes.reserve(OuterLoop.Nodes.size());
560  for (auto N : OuterLoop.Nodes)
561  addNode(N);
562  indexNodes();
563 }
565  Start = 0;
566  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
567  if (!BFI.Working[Index].isPackaged())
568  addNode(Index);
569  indexNodes();
570 }
572  for (auto &I : Nodes)
573  Lookup[I.Node.Index] = &I;
574 }
576  const BFIBase::LoopData *OuterLoop) {
577  if (OuterLoop && OuterLoop->isHeader(Succ))
578  return;
579  auto L = Lookup.find(Succ.Index);
580  if (L == Lookup.end())
581  return;
582  IrrNode &SuccIrr = *L->second;
583  Irr.Edges.push_back(&SuccIrr);
584  SuccIrr.Edges.push_front(&Irr);
585  ++SuccIrr.NumIn;
586 }
587 
588 namespace llvm {
589 template <> struct GraphTraits<IrreducibleGraph> {
591 
592  typedef const GraphT::IrrNode NodeType;
593  typedef GraphT::IrrNode::iterator ChildIteratorType;
594 
595  static const NodeType *getEntryNode(const GraphT &G) {
596  return G.StartIrr;
597  }
599  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
600 };
601 }
602 
603 /// \brief Find extra irreducible headers.
604 ///
605 /// Find entry blocks and other blocks with backedges, which exist when \c G
606 /// contains irreducible sub-SCCs.
609  const IrreducibleGraph &G,
610  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
611  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
612  // Map from nodes in the SCC to whether it's an entry block.
614 
615  // InSCC also acts the set of nodes in the graph. Seed it.
616  for (const auto *I : SCC)
617  InSCC[I] = false;
618 
619  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
620  auto &Irr = *I->first;
621  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
622  if (InSCC.count(P))
623  continue;
624 
625  // This is an entry block.
626  I->second = true;
627  Headers.push_back(Irr.Node);
628  DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
629  break;
630  }
631  }
632  assert(Headers.size() >= 2 &&
633  "Expected irreducible CFG; -loop-info is likely invalid");
634  if (Headers.size() == InSCC.size()) {
635  // Every block is a header.
636  std::sort(Headers.begin(), Headers.end());
637  return;
638  }
639 
640  // Look for extra headers from irreducible sub-SCCs.
641  for (const auto &I : InSCC) {
642  // Entry blocks are already headers.
643  if (I.second)
644  continue;
645 
646  auto &Irr = *I.first;
647  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
648  // Skip forward edges.
649  if (P->Node < Irr.Node)
650  continue;
651 
652  // Skip predecessors from entry blocks. These can have inverted
653  // ordering.
654  if (InSCC.lookup(P))
655  continue;
656 
657  // Store the extra header.
658  Headers.push_back(Irr.Node);
659  DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
660  break;
661  }
662  if (Headers.back() == Irr.Node)
663  // Added this as a header.
664  continue;
665 
666  // This is not a header.
667  Others.push_back(Irr.Node);
668  DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
669  }
670  std::sort(Headers.begin(), Headers.end());
671  std::sort(Others.begin(), Others.end());
672 }
673 
676  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
677  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
678  // Translate the SCC into RPO.
679  DEBUG(dbgs() << " - found-scc\n");
680 
681  LoopData::NodeList Headers;
682  LoopData::NodeList Others;
683  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
684 
685  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
686  Headers.end(), Others.begin(), Others.end());
687 
688  // Update loop hierarchy.
689  for (const auto &N : Loop->Nodes)
690  if (BFI.Working[N.Index].isLoopHeader())
691  BFI.Working[N.Index].Loop->Parent = &*Loop;
692  else
693  BFI.Working[N.Index].Loop = &*Loop;
694 }
695 
698  const IrreducibleGraph &G, LoopData *OuterLoop,
699  std::list<LoopData>::iterator Insert) {
700  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
701  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
702 
703  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
704  if (I->size() < 2)
705  continue;
706 
707  // Translate the SCC into RPO.
708  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
709  }
710 
711  if (OuterLoop)
712  return make_range(std::next(Prev), Insert);
713  return make_range(Loops.begin(), Insert);
714 }
715 
716 void
718  OuterLoop.Exits.clear();
719  for (auto &Mass : OuterLoop.BackedgeMass)
720  Mass = BlockMass::getEmpty();
721  auto O = OuterLoop.Nodes.begin() + 1;
722  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
723  if (!Working[I->Index].isPackaged())
724  *O++ = *I;
725  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
726 }
727 
729  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
730 
731  // Since the loop has more than one header block, the mass flowing back into
732  // each header will be different. Adjust the mass in each header loop to
733  // reflect the masses flowing through back edges.
734  //
735  // To do this, we distribute the initial mass using the backedge masses
736  // as weights for the distribution.
737  BlockMass LoopMass = BlockMass::getFull();
738  Distribution Dist;
739 
740  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
741  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
742  auto &HeaderNode = Loop.Nodes[H];
743  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
744  DEBUG(dbgs() << " - Add back edge mass for node "
745  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
746  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
747  }
748 
749  DitheringDistributer D(Dist, LoopMass);
750 
751  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
752  << " to headers using above weights\n");
753  for (const Weight &W : Dist.Weights) {
754  BlockMass Taken = D.takeMass(W.Amount);
755  assert(W.Type == Weight::Local && "all weights should be local");
756  Working[W.TargetNode.Index].getMass() = Taken;
757  DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
758  }
759 }
void push_back(const T &Elt)
Definition: SmallVector.h:222
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
bool IsPackaged
Whether this has been packaged.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: GraphTraits.h:27
static char getHexDigit(int N)
void addLocal(const BlockNode &Node, uint64_t Amount)
F(f)
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static const NodeType * getEntryNode(const GraphT &G)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
raw_ostream & print(raw_ostream &OS) const
Hexagon Hardware Loops
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
BlockFrequency getBlockFreq(const BlockNode &Node) const
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
#define G(x, y, z)
Definition: MD5.cpp:52
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:224
Graph of irreducible control flow.
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:238
ScaledNumber inverse() const
Definition: ScaledNumber.h:678
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
std::string getLoopName(const LoopData &Loop) const
#define H(x, y, z)
Definition: MD5.cpp:53
Distribution of unscaled probability weight.
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
bool isHeader(const BlockNode &Node) const
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:582
iterator erase(iterator I)
Definition: SmallVector.h:455
void addExit(const BlockNode &Node, uint64_t Amount)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
virtual std::string getBlockName(const BlockNode &Node) const
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:119
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
static void combineWeightsByHashing(WeightList &Weights)
static ChildIteratorType child_end(NodeType *N)
void packageLoop(LoopData &Loop)
Package up a loop.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
std::vector< WorkingData > Working
Loop data: see initializeLoops().
static ChildIteratorType child_begin(NodeType *N)
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.