LCOV - code coverage report
Current view: top level - lib/Analysis - BlockFrequencyInfoImpl.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 243 258 94.2 %
Date: 2018-07-13 00:08:38 Functions: 34 39 87.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Loops should be simplified before this analysis.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
      15             : #include "llvm/ADT/APInt.h"
      16             : #include "llvm/ADT/DenseMap.h"
      17             : #include "llvm/ADT/GraphTraits.h"
      18             : #include "llvm/ADT/None.h"
      19             : #include "llvm/ADT/SCCIterator.h"
      20             : #include "llvm/Config/llvm-config.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include "llvm/Support/BlockFrequency.h"
      23             : #include "llvm/Support/BranchProbability.h"
      24             : #include "llvm/Support/Compiler.h"
      25             : #include "llvm/Support/Debug.h"
      26             : #include "llvm/Support/ScaledNumber.h"
      27             : #include "llvm/Support/MathExtras.h"
      28             : #include "llvm/Support/raw_ostream.h"
      29             : #include <algorithm>
      30             : #include <cassert>
      31             : #include <cstddef>
      32             : #include <cstdint>
      33             : #include <iterator>
      34             : #include <list>
      35             : #include <numeric>
      36             : #include <utility>
      37             : #include <vector>
      38             : 
      39             : using namespace llvm;
      40             : using namespace llvm::bfi_detail;
      41             : 
      42             : #define DEBUG_TYPE "block-freq"
      43             : 
      44     2831339 : ScaledNumber<uint64_t> BlockMass::toScaled() const {
      45     2831339 :   if (isFull())
      46     1401578 :     return ScaledNumber<uint64_t>(1, 0);
      47     1429761 :   return ScaledNumber<uint64_t>(getMass() + 1, -64);
      48             : }
      49             : 
      50             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
      51             : LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
      52             : #endif
      53             : 
      54             : static char getHexDigit(int N) {
      55             :   assert(N < 16);
      56           0 :   if (N < 10)
      57           0 :     return '0' + N;
      58           0 :   return 'a' + N - 10;
      59             : }
      60             : 
      61           0 : raw_ostream &BlockMass::print(raw_ostream &OS) const {
      62           0 :   for (int Digits = 0; Digits < 16; ++Digits)
      63           0 :     OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
      64           0 :   return OS;
      65             : }
      66             : 
      67             : namespace {
      68             : 
      69             : using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
      70             : using Distribution = BlockFrequencyInfoImplBase::Distribution;
      71             : using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
      72             : using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
      73             : using LoopData = BlockFrequencyInfoImplBase::LoopData;
      74             : using Weight = BlockFrequencyInfoImplBase::Weight;
      75             : using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
      76             : 
      77             : /// Dithering mass distributer.
      78             : ///
      79             : /// This class splits up a single mass into portions by weight, dithering to
      80             : /// spread out error.  No mass is lost.  The dithering precision depends on the
      81             : /// precision of the product of \a BlockMass and \a BranchProbability.
      82             : ///
      83             : /// The distribution algorithm follows.
      84             : ///
      85             : ///  1. Initialize by saving the sum of the weights in \a RemWeight and the
      86             : ///     mass to distribute in \a RemMass.
      87             : ///
      88             : ///  2. For each portion:
      89             : ///
      90             : ///      1. Construct a branch probability, P, as the portion's weight divided
      91             : ///         by the current value of \a RemWeight.
      92             : ///      2. Calculate the portion's mass as \a RemMass times P.
      93             : ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
      94             : ///         the current portion's weight and mass.
      95             : struct DitheringDistributer {
      96             :   uint32_t RemWeight;
      97             :   BlockMass RemMass;
      98             : 
      99             :   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
     100             : 
     101             :   BlockMass takeMass(uint32_t Weight);
     102             : };
     103             : 
     104             : } // end anonymous namespace
     105             : 
     106             : DitheringDistributer::DitheringDistributer(Distribution &Dist,
     107     2774895 :                                            const BlockMass &Mass) {
     108     2774895 :   Dist.normalize();
     109     2774894 :   RemWeight = Dist.Total;
     110     2774894 :   RemMass = Mass;
     111             : }
     112             : 
     113     2221771 : BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
     114             :   assert(Weight && "invalid weight");
     115             :   assert(Weight <= RemWeight);
     116     4443542 :   BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
     117             : 
     118             :   // Decrement totals (dither).
     119     2221771 :   RemWeight -= Weight;
     120             :   RemMass -= Mass;
     121     2221771 :   return Mass;
     122             : }
     123             : 
     124     2239461 : void Distribution::add(const BlockNode &Node, uint64_t Amount,
     125             :                        Weight::DistType Type) {
     126             :   assert(Amount && "invalid weight of 0");
     127     2239461 :   uint64_t NewTotal = Total + Amount;
     128             : 
     129             :   // Check for overflow.  It should be impossible to overflow twice.
     130     2239461 :   bool IsOverflow = NewTotal < Total;
     131             :   assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
     132     2239461 :   DidOverflow |= IsOverflow;
     133             : 
     134             :   // Update the total.
     135     2239461 :   Total = NewTotal;
     136             : 
     137             :   // Save the weight.
     138     4478922 :   Weights.push_back(Weight(Type, Node, Amount));
     139     2239461 : }
     140             : 
     141             : static void combineWeight(Weight &W, const Weight &OtherW) {
     142             :   assert(OtherW.TargetNode.isValid());
     143       18479 :   if (!W.Amount) {
     144         817 :     W = OtherW;
     145             :     return;
     146             :   }
     147             :   assert(W.Type == OtherW.Type);
     148             :   assert(W.TargetNode == OtherW.TargetNode);
     149             :   assert(OtherW.Amount && "Expected non-zero weight");
     150       17662 :   if (W.Amount > W.Amount + OtherW.Amount)
     151             :     // Saturate on overflow.
     152           2 :     W.Amount = UINT64_MAX;
     153             :   else
     154       17660 :     W.Amount += OtherW.Amount;
     155             : }
     156             : 
     157      729931 : static void combineWeightsBySorting(WeightList &Weights) {
     158             :   // Sort so edges to the same node are adjacent.
     159             :   llvm::sort(Weights.begin(), Weights.end(),
     160             :              [](const Weight &L,
     161             :                 const Weight &R) { return L.TargetNode < R.TargetNode; });
     162             : 
     163             :   // Combine adjacent edges.
     164             :   WeightList::iterator O = Weights.begin();
     165     3786277 :   for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
     166             :        ++O, (I = L)) {
     167     1528173 :     *O = *I;
     168             : 
     169             :     // Find the adjacent weights to the same node.
     170     2361007 :     for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
     171             :       combineWeight(*O, *L);
     172             :   }
     173             : 
     174             :   // Erase extra entries.
     175             :   Weights.erase(O, Weights.end());
     176      729931 : }
     177             : 
     178           7 : static void combineWeightsByHashing(WeightList &Weights) {
     179             :   // Collect weights into a DenseMap.
     180             :   using HashTable = DenseMap<BlockNode::IndexType, Weight>;
     181             : 
     182          14 :   HashTable Combined(NextPowerOf2(2 * Weights.size()));
     183        2373 :   for (const Weight &W : Weights)
     184        1183 :     combineWeight(Combined[W.TargetNode.Index], W);
     185             : 
     186             :   // Check whether anything changed.
     187           7 :   if (Weights.size() == Combined.size())
     188             :     return;
     189             : 
     190             :   // Fill in the new weights.
     191           4 :   Weights.clear();
     192           4 :   Weights.reserve(Combined.size());
     193         342 :   for (const auto &I : Combined)
     194         334 :     Weights.push_back(I.second);
     195             : }
     196             : 
     197      729938 : static void combineWeights(WeightList &Weights) {
     198             :   // Use a hash table for many successors to keep this linear.
     199      729938 :   if (Weights.size() > 128) {
     200           7 :     combineWeightsByHashing(Weights);
     201           7 :     return;
     202             :   }
     203             : 
     204      729931 :   combineWeightsBySorting(Weights);
     205             : }
     206             : 
     207             : static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
     208             :   assert(Shift >= 0);
     209             :   assert(Shift < 64);
     210             :   if (!Shift)
     211             :     return N;
     212       89258 :   return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
     213             : }
     214             : 
     215     2774897 : void Distribution::normalize() {
     216             :   // Early exit for termination nodes.
     217     2774897 :   if (Weights.empty())
     218             :     return;
     219             : 
     220             :   // Only bother if there are multiple successors.
     221     1422725 :   if (Weights.size() > 1)
     222      729938 :     combineWeights(Weights);
     223             : 
     224             :   // Early exit when combined into a single successor.
     225     1422725 :   if (Weights.size() == 1) {
     226      695027 :     Total = 1;
     227      695027 :     Weights.front().Amount = 1;
     228      695027 :     return;
     229             :   }
     230             : 
     231             :   // Determine how much to shift right so that the total fits into 32-bits.
     232             :   //
     233             :   // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
     234             :   // for each weight can cause a 32-bit overflow.
     235             :   int Shift = 0;
     236      727698 :   if (DidOverflow)
     237             :     Shift = 33;
     238      727698 :   else if (Total > UINT32_MAX)
     239       15777 :     Shift = 33 - countLeadingZeros(Total);
     240             : 
     241             :   // Early exit if nothing needs to be scaled.
     242             :   if (!Shift) {
     243             :     // If we didn't overflow then combineWeights() shouldn't have changed the
     244             :     // sum of the weights, but let's double-check.
     245             :     assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
     246             :                                     [](uint64_t Sum, const Weight &W) {
     247             :                       return Sum + W.Amount;
     248             :                     }) &&
     249             :            "Expected total to be correct");
     250             :     return;
     251             :   }
     252             : 
     253             :   // Recompute the total through accumulation (rather than shifting it) so that
     254             :   // it's accurate after shifting and any changes combineWeights() made above.
     255       15777 :   Total = 0;
     256             : 
     257             :   // Sum the weights to each node and shift right if necessary.
     258      194293 :   for (Weight &W : Weights) {
     259             :     // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
     260             :     // can round here without concern about overflow.
     261             :     assert(W.TargetNode.isValid());
     262      267774 :     W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
     263             :     assert(W.Amount <= UINT32_MAX);
     264             : 
     265             :     // Update the total.
     266       89258 :     Total += W.Amount;
     267             :   }
     268             :   assert(Total <= UINT32_MAX);
     269             : }
     270             : 
     271     2428956 : void BlockFrequencyInfoImplBase::clear() {
     272             :   // Swap with a default-constructed std::vector, since std::vector<>::clear()
     273             :   // does not actually clear heap storage.
     274             :   std::vector<FrequencyData>().swap(Freqs);
     275             :   IsIrrLoopHeader.clear();
     276             :   std::vector<WorkingData>().swap(Working);
     277             :   Loops.clear();
     278     2428956 : }
     279             : 
     280             : /// Clear all memory not needed downstream.
     281             : ///
     282             : /// Releases all memory not used downstream.  In particular, saves Freqs.
     283     1214478 : static void cleanup(BlockFrequencyInfoImplBase &BFI) {
     284     1214478 :   std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
     285     1214478 :   SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
     286     1214478 :   BFI.clear();
     287             :   BFI.Freqs = std::move(SavedFreqs);
     288     1214478 :   BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
     289     1214478 : }
     290             : 
     291     2238463 : bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
     292             :                                            const LoopData *OuterLoop,
     293             :                                            const BlockNode &Pred,
     294             :                                            const BlockNode &Succ,
     295             :                                            uint64_t Weight) {
     296     2238463 :   if (!Weight)
     297             :     Weight = 1;
     298             : 
     299             :   auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
     300     2238650 :     return OuterLoop && OuterLoop->isHeader(Node);
     301             :   };
     302             : 
     303     4476926 :   BlockNode Resolved = Working[Succ.Index].getResolvedNode();
     304             : 
     305             : #ifndef NDEBUG
     306             :   auto debugSuccessor = [&](const char *Type) {
     307             :     dbgs() << "  =>"
     308             :            << " [" << Type << "] weight = " << Weight;
     309             :     if (!isLoopHeader(Resolved))
     310             :       dbgs() << ", succ = " << getBlockName(Succ);
     311             :     if (Resolved != Succ)
     312             :       dbgs() << ", resolved = " << getBlockName(Resolved);
     313             :     dbgs() << "\n";
     314             :   };
     315             :   (void)debugSuccessor;
     316             : #endif
     317             : 
     318             :   if (isLoopHeader(Resolved)) {
     319             :     LLVM_DEBUG(debugSuccessor("backedge"));
     320             :     Dist.addBackedge(Resolved, Weight);
     321       63720 :     return true;
     322             :   }
     323             : 
     324     4349486 :   if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
     325             :     LLVM_DEBUG(debugSuccessor("  exit  "));
     326             :     Dist.addExit(Resolved, Weight);
     327      147704 :     return true;
     328             :   }
     329             : 
     330     2027039 :   if (Resolved < Pred) {
     331             :     if (!isLoopHeader(Pred)) {
     332             :       // If OuterLoop is an irreducible loop, we can't actually handle this.
     333             :       assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
     334             :              "unhandled irreducible control flow");
     335             : 
     336             :       // Irreducible backedge.  Abort.
     337             :       LLVM_DEBUG(debugSuccessor("abort!!!"));
     338             :       return false;
     339             :     }
     340             : 
     341             :     // If "Pred" is a loop header, then this isn't really a backedge; rather,
     342             :     // OuterLoop must be irreducible.  These false backedges can come only from
     343             :     // secondary loop headers.
     344             :     assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
     345             :            "unhandled irreducible control flow");
     346             :   }
     347             : 
     348             :   LLVM_DEBUG(debugSuccessor(" local  "));
     349             :   Dist.addLocal(Resolved, Weight);
     350     2026873 :   return true;
     351             : }
     352             : 
     353       60312 : bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
     354             :     const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
     355             :   // Copy the exit map into Dist.
     356      351658 :   for (const auto &I : Loop.Exits)
     357      291384 :     if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
     358             :                    I.second.getMass()))
     359             :       // Irreducible backedge.
     360             :       return false;
     361             : 
     362             :   return true;
     363             : }
     364             : 
     365             : /// Compute the loop scale for a loop.
     366       60222 : void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
     367             :   // Compute loop scale.
     368             :   LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
     369             : 
     370             :   // Infinite loops need special handling. If we give the back edge an infinite
     371             :   // mass, they may saturate all the other scales in the function down to 1,
     372             :   // making all the other region temperatures look exactly the same. Choose an
     373             :   // arbitrary scale to avoid these issues.
     374             :   //
     375             :   // FIXME: An alternate way would be to select a symbolic scale which is later
     376             :   // replaced to be the maximum of all computed scales plus 1. This would
     377             :   // appropriately describe the loop as having a large scale, without skewing
     378             :   // the final frequency computation.
     379             :   const Scaled64 InfiniteLoopScale(1, 12);
     380             : 
     381             :   // LoopScale == 1 / ExitMass
     382             :   // ExitMass == HeadMass - BackedgeMass
     383             :   BlockMass TotalBackedgeMass;
     384      181504 :   for (auto &Mass : Loop.BackedgeMass)
     385             :     TotalBackedgeMass += Mass;
     386       60222 :   BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
     387             : 
     388             :   // Block scale stores the inverse of the scale. If this is an infinite loop,
     389             :   // its exit mass will be zero. In this case, use an arbitrary scale for the
     390             :   // loop scale.
     391       60222 :   Loop.Scale =
     392      120444 :       ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
     393             : 
     394             :   LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
     395             :                     << BlockMass::getFull() << " - " << TotalBackedgeMass
     396             :                     << ")\n"
     397             :                     << " - scale = " << Loop.Scale << "\n");
     398       60222 : }
     399             : 
     400             : /// Package up a loop.
     401       60222 : void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
     402             :   LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
     403             : 
     404             :   // Clear the subloop exits to prevent quadratic memory usage.
     405      604000 :   for (const BlockNode &M : Loop.Nodes) {
     406      543778 :     if (auto *Loop = Working[M.Index].getPackagedLoop())
     407             :       Loop->Exits.clear();
     408             :     LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
     409             :   }
     410       60222 :   Loop.IsPackaged = true;
     411       60222 : }
     412             : 
     413             : #ifndef NDEBUG
     414             : static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
     415             :                         const DitheringDistributer &D, const BlockNode &T,
     416             :                         const BlockMass &M, const char *Desc) {
     417             :   dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
     418             :   if (Desc)
     419             :     dbgs() << " [" << Desc << "]";
     420             :   if (T.isValid())
     421             :     dbgs() << " to " << BFI.getBlockName(T);
     422             :   dbgs() << "\n";
     423             : }
     424             : #endif
     425             : 
     426     2774563 : void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
     427             :                                                 LoopData *OuterLoop,
     428             :                                                 Distribution &Dist) {
     429     5549126 :   BlockMass Mass = Working[Source.Index].getMass();
     430             :   LLVM_DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
     431             : 
     432             :   // Distribute mass to successors as laid out in Dist.
     433             :   DitheringDistributer D(Dist, Mass);
     434             : 
     435     7215788 :   for (const Weight &W : Dist.Weights) {
     436             :     // Check for a local edge (non-backedge and non-exit).
     437     2220613 :     BlockMass Taken = D.takeMass(W.Amount);
     438     4232009 :     if (W.Type == Weight::Local) {
     439     4022792 :       Working[W.TargetNode.Index].getMass() += Taken;
     440             :       LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     441     4086464 :       continue;
     442             :     }
     443             : 
     444             :     // Backedges and exits only make sense if we're processing a loop.
     445             :     assert(OuterLoop && "backedge or exit outside of loop");
     446             : 
     447             :     // Check for a backedge.
     448      272889 :     if (W.Type == Weight::Backedge) {
     449       63672 :       OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
     450             :       LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
     451       63672 :       continue;
     452             :     }
     453             : 
     454             :     // This must be an exit.
     455             :     assert(W.Type == Weight::Exit);
     456      291090 :     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
     457             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
     458             :   }
     459     2774562 : }
     460             : 
     461     1214478 : static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
     462             :                                      const Scaled64 &Min, const Scaled64 &Max) {
     463             :   // Scale the Factor to a size that creates integers.  Ideally, integers would
     464             :   // be scaled so that Max == UINT64_MAX so that they can be best
     465             :   // differentiated.  However, in the presence of large frequency values, small
     466             :   // frequencies are scaled down to 1, making it impossible to differentiate
     467             :   // small, unequal numbers. When the spread between Min and Max frequencies
     468             :   // fits well within MaxBits, we make the scale be at least 8.
     469             :   const unsigned MaxBits = 64;
     470     1214478 :   const unsigned SpreadBits = (Max / Min).lg();
     471     1214478 :   Scaled64 ScalingFactor;
     472     1214478 :   if (SpreadBits <= MaxBits - 3) {
     473             :     // If the values are small enough, make the scaling factor at least 8 to
     474             :     // allow distinguishing small values.
     475     1213627 :     ScalingFactor = Min.inverse();
     476             :     ScalingFactor <<= 3;
     477             :   } else {
     478             :     // If the values need more than MaxBits to be represented, saturate small
     479             :     // frequency values down to 1 by using a scaling factor that benefits large
     480             :     // frequency values.
     481         851 :     ScalingFactor = Scaled64(1, MaxBits) / Max;
     482             :   }
     483             : 
     484             :   // Translate the floats to integers.
     485             :   LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
     486             :                     << ", factor = " << ScalingFactor << "\n");
     487    10568685 :   for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
     488     2713243 :     Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
     489     8139729 :     BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
     490             :     LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
     491             :                       << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
     492             :                       << ", int = " << BFI.Freqs[Index].Integer << "\n");
     493             :   }
     494     1214478 : }
     495             : 
     496             : /// Unwrap a loop package.
     497             : ///
     498             : /// Visits all the members of a loop, adjusting their BlockData according to
     499             : /// the loop's pseudo-node.
     500       60222 : static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
     501             :   LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
     502             :                     << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
     503             :                     << "\n");
     504       60222 :   Loop.Scale *= Loop.Mass.toScaled();
     505       60222 :   Loop.IsPackaged = false;
     506             :   LLVM_DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
     507             : 
     508             :   // Propagate the head scale through the loop.  Since members are visited in
     509             :   // RPO, the head scale will be updated by the loop scale first, and then the
     510             :   // final head scale will be used for updated the rest of the members.
     511      604000 :   for (const BlockNode &N : Loop.Nodes) {
     512      271889 :     const auto &Working = BFI.Working[N.Index];
     513      280157 :     Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
     514      543778 :                                        : BFI.Freqs[N.Index].Scaled;
     515             :     Scaled64 New = Loop.Scale * F;
     516             :     LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
     517             :                       << New << "\n");
     518      271889 :     F = New;
     519             :   }
     520       60222 : }
     521             : 
     522     1214478 : void BlockFrequencyInfoImplBase::unwrapLoops() {
     523             :   // Set initial frequencies from loop-local masses.
     524    10568682 :   for (size_t Index = 0; Index < Working.size(); ++Index)
     525     5426484 :     Freqs[Index].Scaled = Working[Index].Mass.toScaled();
     526             : 
     527     2489178 :   for (LoopData &Loop : Loops)
     528       60222 :     unwrapLoop(*this, Loop);
     529     1214478 : }
     530             : 
     531     1214478 : void BlockFrequencyInfoImplBase::finalizeMetrics() {
     532             :   // Unwrap loop packages in reverse post-order, tracking min and max
     533             :   // frequencies.
     534     1214478 :   auto Min = Scaled64::getLargest();
     535     1214478 :   auto Max = Scaled64::getZero();
     536    10568685 :   for (size_t Index = 0; Index < Working.size(); ++Index) {
     537             :     // Update min/max scale.
     538     8139729 :     Min = std::min(Min, Freqs[Index].Scaled);
     539     8139729 :     Max = std::max(Max, Freqs[Index].Scaled);
     540             :   }
     541             : 
     542             :   // Convert to integers.
     543     1214478 :   convertFloatingToInteger(*this, Min, Max);
     544             : 
     545             :   // Clean up data structures.
     546     1214478 :   cleanup(*this);
     547             : 
     548             :   // Print out the final stats.
     549             :   LLVM_DEBUG(dump());
     550     1214478 : }
     551             : 
     552             : BlockFrequency
     553     6837118 : BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
     554     6837118 :   if (!Node.isValid())
     555       17729 :     return 0;
     556    13638778 :   return Freqs[Node.Index].Integer;
     557             : }
     558             : 
     559             : Optional<uint64_t>
     560        1330 : BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
     561             :                                                  const BlockNode &Node) const {
     562        2660 :   return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
     563             : }
     564             : 
     565             : Optional<uint64_t>
     566        1421 : BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
     567             :                                                     uint64_t Freq) const {
     568        1421 :   auto EntryCount = F.getEntryCount();
     569        1421 :   if (!EntryCount)
     570             :     return None;
     571             :   // Use 128 bit APInt to do the arithmetic to avoid overflow.
     572         494 :   APInt BlockCount(128, EntryCount.getCount());
     573             :   APInt BlockFreq(128, Freq);
     574             :   APInt EntryFreq(128, getEntryFreq());
     575         494 :   BlockCount *= BlockFreq;
     576         988 :   BlockCount = BlockCount.udiv(EntryFreq);
     577             :   return BlockCount.getLimitedValue();
     578             : }
     579             : 
     580             : bool
     581         265 : BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
     582         265 :   if (!Node.isValid())
     583             :     return false;
     584         259 :   return IsIrrLoopHeader.test(Node.Index);
     585             : }
     586             : 
     587             : Scaled64
     588         532 : BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
     589         532 :   if (!Node.isValid())
     590             :     return Scaled64::getZero();
     591        1064 :   return Freqs[Node.Index].Scaled;
     592             : }
     593             : 
     594        2117 : void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
     595             :                                               uint64_t Freq) {
     596             :   assert(Node.isValid() && "Expected valid node");
     597             :   assert(Node.Index < Freqs.size() && "Expected legal index");
     598        4234 :   Freqs[Node.Index].Integer = Freq;
     599        2117 : }
     600             : 
     601             : std::string
     602           0 : BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
     603           0 :   return {};
     604             : }
     605             : 
     606             : std::string
     607           0 : BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
     608           0 :   return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
     609             : }
     610             : 
     611             : raw_ostream &
     612           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     613             :                                            const BlockNode &Node) const {
     614           0 :   return OS << getFloatingBlockFreq(Node);
     615             : }
     616             : 
     617             : raw_ostream &
     618           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     619             :                                            const BlockFrequency &Freq) const {
     620             :   Scaled64 Block(Freq.getFrequency(), 0);
     621             :   Scaled64 Entry(getEntryFreq(), 0);
     622             : 
     623           0 :   return OS << Block / Entry;
     624             : }
     625             : 
     626          45 : void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
     627          45 :   Start = OuterLoop.getHeader();
     628          90 :   Nodes.reserve(OuterLoop.Nodes.size());
     629        1295 :   for (auto N : OuterLoop.Nodes)
     630         625 :     addNode(N);
     631          45 :   indexNodes();
     632          45 : }
     633             : 
     634         121 : void IrreducibleGraph::addNodesInFunction() {
     635         121 :   Start = 0;
     636        8063 :   for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
     637        2607 :     if (!BFI.Working[Index].isPackaged())
     638        2438 :       addNode(Index);
     639         121 :   indexNodes();
     640         121 : }
     641             : 
     642         166 : void IrreducibleGraph::indexNodes() {
     643        3229 :   for (auto &I : Nodes)
     644        6126 :     Lookup[I.Node.Index] = &I;
     645         166 : }
     646             : 
     647        5792 : void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
     648             :                                const BFIBase::LoopData *OuterLoop) {
     649        5792 :   if (OuterLoop && OuterLoop->isHeader(Succ))
     650         206 :     return;
     651        5717 :   auto L = Lookup.find(Succ.Index);
     652        5717 :   if (L == Lookup.end())
     653             :     return;
     654        5586 :   IrrNode &SuccIrr = *L->second;
     655       11172 :   Irr.Edges.push_back(&SuccIrr);
     656       11172 :   SuccIrr.Edges.push_front(&Irr);
     657        5586 :   ++SuccIrr.NumIn;
     658             : }
     659             : 
     660             : namespace llvm {
     661             : 
     662             : template <> struct GraphTraits<IrreducibleGraph> {
     663             :   using GraphT = bfi_detail::IrreducibleGraph;
     664             :   using NodeRef = const GraphT::IrrNode *;
     665             :   using ChildIteratorType = GraphT::IrrNode::iterator;
     666             : 
     667             :   static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
     668        3063 :   static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
     669             :   static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
     670             : };
     671             : 
     672             : } // end namespace llvm
     673             : 
     674             : /// Find extra irreducible headers.
     675             : ///
     676             : /// Find entry blocks and other blocks with backedges, which exist when \c G
     677             : /// contains irreducible sub-SCCs.
     678         169 : static void findIrreducibleHeaders(
     679             :     const BlockFrequencyInfoImplBase &BFI,
     680             :     const IrreducibleGraph &G,
     681             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
     682             :     LoopData::NodeList &Headers, LoopData::NodeList &Others) {
     683             :   // Map from nodes in the SCC to whether it's an entry block.
     684             :   SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
     685             : 
     686             :   // InSCC also acts the set of nodes in the graph.  Seed it.
     687         169 :   for (const auto *I : SCC)
     688        1927 :     InSCC[I] = false;
     689             : 
     690        2265 :   for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
     691        1927 :     auto &Irr = *I->first;
     692        5827 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     693        3900 :       if (InSCC.count(P))
     694             :         continue;
     695             : 
     696             :       // This is an entry block.
     697         462 :       I->second = true;
     698         462 :       Headers.push_back(Irr.Node);
     699             :       LLVM_DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node)
     700             :                         << "\n");
     701             :       break;
     702             :     }
     703             :   }
     704             :   assert(Headers.size() >= 2 &&
     705             :          "Expected irreducible CFG; -loop-info is likely invalid");
     706         169 :   if (Headers.size() == InSCC.size()) {
     707             :     // Every block is a header.
     708             :     llvm::sort(Headers.begin(), Headers.end());
     709             :     return;
     710             :   }
     711             : 
     712             :   // Look for extra headers from irreducible sub-SCCs.
     713        1920 :   for (const auto &I : InSCC) {
     714             :     // Entry blocks are already headers.
     715        1714 :     if (I.second)
     716             :       continue;
     717             : 
     718        1465 :     auto &Irr = *I.first;
     719        3094 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     720             :       // Skip forward edges.
     721        1755 :       if (P->Node < Irr.Node)
     722             :         continue;
     723             : 
     724             :       // Skip predecessors from entry blocks.  These can have inverted
     725             :       // ordering.
     726         147 :       if (InSCC.lookup(P))
     727             :         continue;
     728             : 
     729             :       // Store the extra header.
     730         126 :       Headers.push_back(Irr.Node);
     731             :       LLVM_DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node)
     732             :                         << "\n");
     733             :       break;
     734             :     }
     735        2930 :     if (Headers.back() == Irr.Node)
     736             :       // Added this as a header.
     737             :       continue;
     738             : 
     739             :     // This is not a header.
     740        1339 :     Others.push_back(Irr.Node);
     741             :     LLVM_DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
     742             :   }
     743             :   llvm::sort(Headers.begin(), Headers.end());
     744             :   llvm::sort(Others.begin(), Others.end());
     745             : }
     746             : 
     747         169 : static void createIrreducibleLoop(
     748             :     BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
     749             :     LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
     750             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
     751             :   // Translate the SCC into RPO.
     752             :   LLVM_DEBUG(dbgs() << " - found-scc\n");
     753             : 
     754             :   LoopData::NodeList Headers;
     755             :   LoopData::NodeList Others;
     756         169 :   findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
     757             : 
     758         338 :   auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
     759         676 :                                 Headers.end(), Others.begin(), Others.end());
     760             : 
     761             :   // Update loop hierarchy.
     762        4023 :   for (const auto &N : Loop->Nodes)
     763        1927 :     if (BFI.Working[N.Index].isLoopHeader())
     764         146 :       BFI.Working[N.Index].Loop->Parent = &*Loop;
     765             :     else
     766        1781 :       BFI.Working[N.Index].Loop = &*Loop;
     767         169 : }
     768             : 
     769             : iterator_range<std::list<LoopData>::iterator>
     770         166 : BlockFrequencyInfoImplBase::analyzeIrreducible(
     771             :     const IrreducibleGraph &G, LoopData *OuterLoop,
     772             :     std::list<LoopData>::iterator Insert) {
     773             :   assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
     774         166 :   auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
     775             : 
     776        1637 :   for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
     777        1305 :     if (I->size() < 2)
     778        1136 :       continue;
     779             : 
     780             :     // Translate the SCC into RPO.
     781         169 :     createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
     782             :   }
     783             : 
     784         166 :   if (OuterLoop)
     785             :     return make_range(std::next(Prev), Insert);
     786         121 :   return make_range(Loops.begin(), Insert);
     787             : }
     788             : 
     789             : void
     790          45 : BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
     791             :   OuterLoop.Exits.clear();
     792         135 :   for (auto &Mass : OuterLoop.BackedgeMass)
     793          45 :     Mass = BlockMass::getEmpty();
     794          45 :   auto O = OuterLoop.Nodes.begin() + 1;
     795        1205 :   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
     796        1160 :     if (!Working[I->Index].isPackaged())
     797         252 :       *O++ = *I;
     798             :   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
     799          45 : }
     800             : 
     801         163 : void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
     802             :   assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
     803             : 
     804             :   // Since the loop has more than one header block, the mass flowing back into
     805             :   // each header will be different. Adjust the mass in each header loop to
     806             :   // reflect the masses flowing through back edges.
     807             :   //
     808             :   // To do this, we distribute the initial mass using the backedge masses
     809             :   // as weights for the distribution.
     810             :   BlockMass LoopMass = BlockMass::getFull();
     811             :   Distribution Dist;
     812             : 
     813             :   LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
     814        1303 :   for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
     815         570 :     auto &HeaderNode = Loop.Nodes[H];
     816         570 :     auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
     817             :     LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
     818             :                       << getBlockName(HeaderNode) << ": " << BackedgeMass
     819             :                       << "\n");
     820         570 :     if (BackedgeMass.getMass() > 0)
     821             :       Dist.addLocal(HeaderNode, BackedgeMass.getMass());
     822             :     else
     823             :       LLVM_DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
     824             :   }
     825             : 
     826             :   DitheringDistributer D(Dist, LoopMass);
     827             : 
     828             :   LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
     829             :                     << " to headers using above weights\n");
     830        1303 :   for (const Weight &W : Dist.Weights) {
     831         570 :     BlockMass Taken = D.takeMass(W.Amount);
     832             :     assert(W.Type == Weight::Local && "all weights should be local");
     833        1140 :     Working[W.TargetNode.Index].getMass() = Taken;
     834             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     835             :   }
     836         163 : }
     837             : 
     838         169 : void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
     839             :   BlockMass LoopMass = BlockMass::getFull();
     840             :   DitheringDistributer D(Dist, LoopMass);
     841        1345 :   for (const Weight &W : Dist.Weights) {
     842         588 :     BlockMass Taken = D.takeMass(W.Amount);
     843             :     assert(W.Type == Weight::Local && "all weights should be local");
     844        1176 :     Working[W.TargetNode.Index].getMass() = Taken;
     845             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     846             :   }
     847         169 : }

Generated by: LCOV version 1.13