LCOV - code coverage report
Current view: top level - lib/Analysis - BlockFrequencyInfoImpl.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 277 294 94.2 %
Date: 2017-09-14 15:23:50 Functions: 32 37 86.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Loops should be simplified before this analysis.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
      15             : #include "llvm/ADT/APInt.h"
      16             : #include "llvm/ADT/DenseMap.h"
      17             : #include "llvm/ADT/GraphTraits.h"
      18             : #include "llvm/ADT/None.h"
      19             : #include "llvm/ADT/SCCIterator.h"
      20             : #include "llvm/IR/Function.h"
      21             : #include "llvm/Support/BlockFrequency.h"
      22             : #include "llvm/Support/BranchProbability.h"
      23             : #include "llvm/Support/Compiler.h"
      24             : #include "llvm/Support/Debug.h"
      25             : #include "llvm/Support/ScaledNumber.h"
      26             : #include "llvm/Support/MathExtras.h"
      27             : #include "llvm/Support/raw_ostream.h"
      28             : #include <algorithm>
      29             : #include <cassert>
      30             : #include <cstddef>
      31             : #include <cstdint>
      32             : #include <iterator>
      33             : #include <list>
      34             : #include <numeric>
      35             : #include <utility>
      36             : #include <vector>
      37             : 
      38             : using namespace llvm;
      39             : using namespace llvm::bfi_detail;
      40             : 
      41             : #define DEBUG_TYPE "block-freq"
      42             : 
      43     2083484 : ScaledNumber<uint64_t> BlockMass::toScaled() const {
      44     2083484 :   if (isFull())
      45      921385 :     return ScaledNumber<uint64_t>(1, 0);
      46     1162099 :   return ScaledNumber<uint64_t>(getMass() + 1, -64);
      47             : }
      48             : 
      49             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
      50             : LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
      51             : #endif
      52             : 
      53             : static char getHexDigit(int N) {
      54             :   assert(N < 16);
      55           0 :   if (N < 10)
      56           0 :     return '0' + N;
      57           0 :   return 'a' + N - 10;
      58             : }
      59             : 
      60           0 : raw_ostream &BlockMass::print(raw_ostream &OS) const {
      61           0 :   for (int Digits = 0; Digits < 16; ++Digits)
      62           0 :     OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
      63           0 :   return OS;
      64             : }
      65             : 
      66             : namespace {
      67             : 
      68             : using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
      69             : using Distribution = BlockFrequencyInfoImplBase::Distribution;
      70             : using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
      71             : using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
      72             : using LoopData = BlockFrequencyInfoImplBase::LoopData;
      73             : using Weight = BlockFrequencyInfoImplBase::Weight;
      74             : using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
      75             : 
      76             : /// \brief Dithering mass distributer.
      77             : ///
      78             : /// This class splits up a single mass into portions by weight, dithering to
      79             : /// spread out error.  No mass is lost.  The dithering precision depends on the
      80             : /// precision of the product of \a BlockMass and \a BranchProbability.
      81             : ///
      82             : /// The distribution algorithm follows.
      83             : ///
      84             : ///  1. Initialize by saving the sum of the weights in \a RemWeight and the
      85             : ///     mass to distribute in \a RemMass.
      86             : ///
      87             : ///  2. For each portion:
      88             : ///
      89             : ///      1. Construct a branch probability, P, as the portion's weight divided
      90             : ///         by the current value of \a RemWeight.
      91             : ///      2. Calculate the portion's mass as \a RemMass times P.
      92             : ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
      93             : ///         the current portion's weight and mass.
      94             : struct DitheringDistributer {
      95             :   uint32_t RemWeight;
      96             :   BlockMass RemMass;
      97             : 
      98             :   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
      99             : 
     100             :   BlockMass takeMass(uint32_t Weight);
     101             : };
     102             : 
     103             : } // end anonymous namespace
     104             : 
     105             : DitheringDistributer::DitheringDistributer(Distribution &Dist,
     106     4078490 :                                            const BlockMass &Mass) {
     107     2039245 :   Dist.normalize();
     108     2039245 :   RemWeight = Dist.Total;
     109     2039245 :   RemMass = Mass;
     110             : }
     111             : 
     112     1798252 : BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
     113             :   assert(Weight && "invalid weight");
     114             :   assert(Weight <= RemWeight);
     115     3596504 :   BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
     116             : 
     117             :   // Decrement totals (dither).
     118     1798252 :   RemWeight -= Weight;
     119     3596504 :   RemMass -= Mass;
     120     1798252 :   return Mass;
     121             : }
     122             : 
     123     1811577 : void Distribution::add(const BlockNode &Node, uint64_t Amount,
     124             :                        Weight::DistType Type) {
     125             :   assert(Amount && "invalid weight of 0");
     126     1811577 :   uint64_t NewTotal = Total + Amount;
     127             : 
     128             :   // Check for overflow.  It should be impossible to overflow twice.
     129     1811577 :   bool IsOverflow = NewTotal < Total;
     130             :   assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
     131     1811577 :   DidOverflow |= IsOverflow;
     132             : 
     133             :   // Update the total.
     134     1811577 :   Total = NewTotal;
     135             : 
     136             :   // Save the weight.
     137     3623154 :   Weights.push_back(Weight(Type, Node, Amount));
     138     1811577 : }
     139             : 
     140             : static void combineWeight(Weight &W, const Weight &OtherW) {
     141             :   assert(OtherW.TargetNode.isValid());
     142       13951 :   if (!W.Amount) {
     143         650 :     W = OtherW;
     144             :     return;
     145             :   }
     146             :   assert(W.Type == OtherW.Type);
     147             :   assert(W.TargetNode == OtherW.TargetNode);
     148             :   assert(OtherW.Amount && "Expected non-zero weight");
     149       13301 :   if (W.Amount > W.Amount + OtherW.Amount)
     150             :     // Saturate on overflow.
     151           2 :     W.Amount = UINT64_MAX;
     152             :   else
     153       13299 :     W.Amount += OtherW.Amount;
     154             : }
     155             : 
     156      591745 : static void combineWeightsBySorting(WeightList &Weights) {
     157             :   // Sort so edges to the same node are adjacent.
     158     2366980 :   std::sort(Weights.begin(), Weights.end(),
     159             :             [](const Weight &L,
     160     1989500 :                const Weight &R) { return L.TargetNode < R.TargetNode; });
     161             : 
     162             :   // Combine adjacent edges.
     163     1183490 :   WeightList::iterator O = Weights.begin();
     164     3658182 :   for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
     165     1237346 :        ++O, (I = L)) {
     166     1237346 :     *O = *I;
     167             : 
     168             :     // Find the adjacent weights to the same node.
     169     1909183 :     for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
     170       13118 :       combineWeight(*O, *L);
     171             :   }
     172             : 
     173             :   // Erase extra entries.
     174     1183490 :   Weights.erase(O, Weights.end());
     175      591745 : }
     176             : 
     177           5 : static void combineWeightsByHashing(WeightList &Weights) {
     178             :   // Collect weights into a DenseMap.
     179             :   using HashTable = DenseMap<BlockNode::IndexType, Weight>;
     180             : 
     181          22 :   HashTable Combined(NextPowerOf2(2 * Weights.size()));
     182         848 :   for (const Weight &W : Weights)
     183        2499 :     combineWeight(Combined[W.TargetNode.Index], W);
     184             : 
     185             :   // Check whether anything changed.
     186          15 :   if (Weights.size() == Combined.size())
     187           3 :     return;
     188             : 
     189             :   // Fill in the new weights.
     190           4 :   Weights.clear();
     191           4 :   Weights.reserve(Combined.size());
     192         173 :   for (const auto &I : Combined)
     193         167 :     Weights.push_back(I.second);
     194             : }
     195             : 
     196      591750 : static void combineWeights(WeightList &Weights) {
     197             :   // Use a hash table for many successors to keep this linear.
     198     1183500 :   if (Weights.size() > 128) {
     199           5 :     combineWeightsByHashing(Weights);
     200           5 :     return;
     201             :   }
     202             : 
     203      591745 :   combineWeightsBySorting(Weights);
     204             : }
     205             : 
     206             : static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
     207             :   assert(Shift >= 0);
     208             :   assert(Shift < 64);
     209             :   if (!Shift)
     210             :     return N;
     211       70708 :   return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
     212             : }
     213             : 
     214     2039248 : void Distribution::normalize() {
     215             :   // Early exit for termination nodes.
     216     2039248 :   if (Weights.empty())
     217             :     return;
     218             : 
     219             :   // Only bother if there are multiple successors.
     220     2304024 :   if (Weights.size() > 1)
     221      591750 :     combineWeights(Weights);
     222             : 
     223             :   // Early exit when combined into a single successor.
     224     2304024 :   if (Weights.size() == 1) {
     225      562281 :     Total = 1;
     226     1124562 :     Weights.front().Amount = 1;
     227      562281 :     return;
     228             :   }
     229             : 
     230             :   // Determine how much to shift right so that the total fits into 32-bits.
     231             :   //
     232             :   // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
     233             :   // for each weight can cause a 32-bit overflow.
     234      589731 :   int Shift = 0;
     235      589731 :   if (DidOverflow)
     236             :     Shift = 33;
     237      589731 :   else if (Total > UINT32_MAX)
     238       24674 :     Shift = 33 - countLeadingZeros(Total);
     239             : 
     240             :   // Early exit if nothing needs to be scaled.
     241             :   if (!Shift) {
     242             :     // If we didn't overflow then combineWeights() shouldn't have changed the
     243             :     // sum of the weights, but let's double-check.
     244             :     assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
     245             :                                     [](uint64_t Sum, const Weight &W) {
     246             :                       return Sum + W.Amount;
     247             :                     }) &&
     248             :            "Expected total to be correct");
     249             :     return;
     250             :   }
     251             : 
     252             :   // Recompute the total through accumulation (rather than shifting it) so that
     253             :   // it's accurate after shifting and any changes combineWeights() made above.
     254       12337 :   Total = 0;
     255             : 
     256             :   // Sum the weights to each node and shift right if necessary.
     257       83045 :   for (Weight &W : Weights) {
     258             :     // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
     259             :     // can round here without concern about overflow.
     260             :     assert(W.TargetNode.isValid());
     261      212124 :     W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
     262             :     assert(W.Amount <= UINT32_MAX);
     263             : 
     264             :     // Update the total.
     265       70708 :     Total += W.Amount;
     266             :   }
     267             :   assert(Total <= UINT32_MAX);
     268             : }
     269             : 
     270     1550732 : void BlockFrequencyInfoImplBase::clear() {
     271             :   // Swap with a default-constructed std::vector, since std::vector<>::clear()
     272             :   // does not actually clear heap storage.
     273     4652196 :   std::vector<FrequencyData>().swap(Freqs);
     274     4652196 :   std::vector<WorkingData>().swap(Working);
     275     3101464 :   Loops.clear();
     276     1550732 : }
     277             : 
     278             : /// \brief Clear all memory not needed downstream.
     279             : ///
     280             : /// Releases all memory not used downstream.  In particular, saves Freqs.
     281      775366 : static void cleanup(BlockFrequencyInfoImplBase &BFI) {
     282     2326098 :   std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
     283      775366 :   BFI.clear();
     284     1550732 :   BFI.Freqs = std::move(SavedFreqs);
     285      775366 : }
     286             : 
     287     1811248 : bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
     288             :                                            const LoopData *OuterLoop,
     289             :                                            const BlockNode &Pred,
     290             :                                            const BlockNode &Succ,
     291             :                                            uint64_t Weight) {
     292     1811248 :   if (!Weight)
     293        1329 :     Weight = 1;
     294             : 
     295             :   auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
     296     1811379 :     return OuterLoop && OuterLoop->isHeader(Node);
     297     1811248 :   };
     298             : 
     299     5433744 :   BlockNode Resolved = Working[Succ.Index].getResolvedNode();
     300             : 
     301             : #ifndef NDEBUG
     302             :   auto debugSuccessor = [&](const char *Type) {
     303             :     dbgs() << "  =>"
     304             :            << " [" << Type << "] weight = " << Weight;
     305             :     if (!isLoopHeader(Resolved))
     306             :       dbgs() << ", succ = " << getBlockName(Succ);
     307             :     if (Resolved != Succ)
     308             :       dbgs() << ", resolved = " << getBlockName(Resolved);
     309             :     dbgs() << "\n";
     310             :   };
     311             :   (void)debugSuccessor;
     312             : #endif
     313             : 
     314       50125 :   if (isLoopHeader(Resolved)) {
     315             :     DEBUG(debugSuccessor("backedge"));
     316       50125 :     Dist.addBackedge(Resolved, Weight);
     317       50125 :     return true;
     318             :   }
     319             : 
     320     3522246 :   if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
     321             :     DEBUG(debugSuccessor("  exit  "));
     322      115592 :     Dist.addExit(Resolved, Weight);
     323      115592 :     return true;
     324             :   }
     325             : 
     326     1645531 :   if (Resolved < Pred) {
     327             :     if (!isLoopHeader(Pred)) {
     328             :       // If OuterLoop is an irreducible loop, we can't actually handle this.
     329             :       assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
     330             :              "unhandled irreducible control flow");
     331             : 
     332             :       // Irreducible backedge.  Abort.
     333             :       DEBUG(debugSuccessor("abort!!!"));
     334             :       return false;
     335             :     }
     336             : 
     337             :     // If "Pred" is a loop header, then this isn't really a backedge; rather,
     338             :     // OuterLoop must be irreducible.  These false backedges can come only from
     339             :     // secondary loop headers.
     340             :     assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
     341             :            "unhandled irreducible control flow");
     342             :   }
     343             : 
     344             :   DEBUG(debugSuccessor(" local  "));
     345     1645413 :   Dist.addLocal(Resolved, Weight);
     346     1645413 :   return true;
     347             : }
     348             : 
     349       46907 : bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
     350             :     const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
     351             :   // Copy the exit map into Dist.
     352      255259 :   for (const auto &I : Loop.Exits)
     353      343662 :     if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
     354             :                    I.second.getMass()))
     355             :       // Irreducible backedge.
     356             :       return false;
     357             : 
     358             :   return true;
     359             : }
     360             : 
     361             : /// \brief Compute the loop scale for a loop.
     362       46836 : void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
     363             :   // Compute loop scale.
     364             :   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
     365             : 
     366             :   // Infinite loops need special handling. If we give the back edge an infinite
     367             :   // mass, they may saturate all the other scales in the function down to 1,
     368             :   // making all the other region temperatures look exactly the same. Choose an
     369             :   // arbitrary scale to avoid these issues.
     370             :   //
     371             :   // FIXME: An alternate way would be to select a symbolic scale which is later
     372             :   // replaced to be the maximum of all computed scales plus 1. This would
     373             :   // appropriately describe the loop as having a large scale, without skewing
     374             :   // the final frequency computation.
     375       46836 :   const Scaled64 InfiniteLoopScale(1, 12);
     376             : 
     377             :   // LoopScale == 1 / ExitMass
     378             :   // ExitMass == HeadMass - BackedgeMass
     379       46836 :   BlockMass TotalBackedgeMass;
     380      187667 :   for (auto &Mass : Loop.BackedgeMass)
     381       47159 :     TotalBackedgeMass += Mass;
     382       46836 :   BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
     383             : 
     384             :   // Block scale stores the inverse of the scale. If this is an infinite loop,
     385             :   // its exit mass will be zero. In this case, use an arbitrary scale for the
     386             :   // loop scale.
     387       46836 :   Loop.Scale =
     388       93672 :       ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
     389             : 
     390             :   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
     391             :                << " - " << TotalBackedgeMass << ")\n"
     392             :                << " - scale = " << Loop.Scale << "\n");
     393       46836 : }
     394             : 
     395             : /// \brief Package up a loop.
     396       46836 : void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
     397             :   DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
     398             : 
     399             :   // Clear the subloop exits to prevent quadratic memory usage.
     400      356560 :   for (const BlockNode &M : Loop.Nodes) {
     401      438378 :     if (auto *Loop = Working[M.Index].getPackagedLoop())
     402        6274 :       Loop->Exits.clear();
     403             :     DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
     404             :   }
     405       46836 :   Loop.IsPackaged = true;
     406       46836 : }
     407             : 
     408             : #ifndef NDEBUG
     409             : static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
     410             :                         const DitheringDistributer &D, const BlockNode &T,
     411             :                         const BlockMass &M, const char *Desc) {
     412             :   dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
     413             :   if (Desc)
     414             :     dbgs() << " [" << Desc << "]";
     415             :   if (T.isValid())
     416             :     dbgs() << " to " << BFI.getBlockName(T);
     417             :   dbgs() << "\n";
     418             : }
     419             : #endif
     420             : 
     421     2039125 : void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
     422             :                                                 LoopData *OuterLoop,
     423             :                                                 Distribution &Dist) {
     424     4078250 :   BlockMass Mass = Working[Source.Index].getMass();
     425             :   DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
     426             : 
     427             :   // Distribute mass to successors as laid out in Dist.
     428     2039125 :   DitheringDistributer D(Dist, Mass);
     429             : 
     430     7915186 :   for (const Weight &W : Dist.Weights) {
     431             :     // Check for a local edge (non-backedge and non-exit).
     432     1797811 :     BlockMass Taken = D.takeMass(W.Amount);
     433     3431129 :     if (W.Type == Weight::Local) {
     434     4899954 :       Working[W.TargetNode.Index].getMass() += Taken;
     435             :       DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     436     3316728 :       continue;
     437             :     }
     438             : 
     439             :     // Backedges and exits only make sense if we're processing a loop.
     440             :     assert(OuterLoop && "backedge or exit outside of loop");
     441             : 
     442             :     // Check for a backedge.
     443      214585 :     if (W.Type == Weight::Backedge) {
     444      150276 :       OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
     445             :       DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
     446       50092 :       continue;
     447             :     }
     448             : 
     449             :     // This must be an exit.
     450             :     assert(W.Type == Weight::Exit);
     451      228802 :     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
     452             :     DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
     453             :   }
     454     2039125 : }
     455             : 
     456      775366 : static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
     457             :                                      const Scaled64 &Min, const Scaled64 &Max) {
     458             :   // Scale the Factor to a size that creates integers.  Ideally, integers would
     459             :   // be scaled so that Max == UINT64_MAX so that they can be best
     460             :   // differentiated.  However, in the presence of large frequency values, small
     461             :   // frequencies are scaled down to 1, making it impossible to differentiate
     462             :   // small, unequal numbers. When the spread between Min and Max frequencies
     463             :   // fits well within MaxBits, we make the scale be at least 8.
     464      775366 :   const unsigned MaxBits = 64;
     465     1550732 :   const unsigned SpreadBits = (Max / Min).lg();
     466      775366 :   Scaled64 ScalingFactor;
     467      775366 :   if (SpreadBits <= MaxBits - 3) {
     468             :     // If the values are small enough, make the scaling factor at least 8 to
     469             :     // allow distinguishing small values.
     470      774775 :     ScalingFactor = Min.inverse();
     471             :     ScalingFactor <<= 3;
     472             :   } else {
     473             :     // If the values need more than MaxBits to be represented, saturate small
     474             :     // frequency values down to 1 by using a scaling factor that benefits large
     475             :     // frequency values.
     476        1182 :     ScalingFactor = Scaled64(1, MaxBits) / Max;
     477             :   }
     478             : 
     479             :   // Translate the floats to integers.
     480             :   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
     481             :                << ", factor = " << ScalingFactor << "\n");
     482     7525250 :   for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
     483     5974518 :     Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
     484     5974518 :     BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
     485             :     DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
     486             :                  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
     487             :                  << ", int = " << BFI.Freqs[Index].Integer << "\n");
     488             :   }
     489      775366 : }
     490             : 
     491             : /// \brief Unwrap a loop package.
     492             : ///
     493             : /// Visits all the members of a loop, adjusting their BlockData according to
     494             : /// the loop's pseudo-node.
     495       46836 : static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
     496             :   DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
     497             :                << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
     498             :                << "\n");
     499       46836 :   Loop.Scale *= Loop.Mass.toScaled();
     500       46836 :   Loop.IsPackaged = false;
     501             :   DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
     502             : 
     503             :   // Propagate the head scale through the loop.  Since members are visited in
     504             :   // RPO, the head scale will be updated by the loop scale first, and then the
     505             :   // final head scale will be used for updated the rest of the members.
     506      356560 :   for (const BlockNode &N : Loop.Nodes) {
     507      432104 :     const auto &Working = BFI.Working[N.Index];
     508      222326 :     Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
     509      432104 :                                        : BFI.Freqs[N.Index].Scaled;
     510      432104 :     Scaled64 New = Loop.Scale * F;
     511             :     DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
     512             :                  << "\n");
     513      216052 :     F = New;
     514             :   }
     515       46836 : }
     516             : 
     517      775366 : void BlockFrequencyInfoImplBase::unwrapLoops() {
     518             :   // Set initial frequencies from loop-local masses.
     519     5533744 :   for (size_t Index = 0; Index < Working.size(); ++Index)
     520     5974518 :     Freqs[Index].Scaled = Working[Index].Mass.toScaled();
     521             : 
     522     2372934 :   for (LoopData &Loop : Loops)
     523       46836 :     unwrapLoop(*this, Loop);
     524      775366 : }
     525             : 
     526      775366 : void BlockFrequencyInfoImplBase::finalizeMetrics() {
     527             :   // Unwrap loop packages in reverse post-order, tracking min and max
     528             :   // frequencies.
     529      775366 :   auto Min = Scaled64::getLargest();
     530      775366 :   auto Max = Scaled64::getZero();
     531     5533742 :   for (size_t Index = 0; Index < Working.size(); ++Index) {
     532             :     // Update min/max scale.
     533     5974517 :     Min = std::min(Min, Freqs[Index].Scaled);
     534     5974515 :     Max = std::max(Max, Freqs[Index].Scaled);
     535             :   }
     536             : 
     537             :   // Convert to integers.
     538      775365 :   convertFloatingToInteger(*this, Min, Max);
     539             : 
     540             :   // Clean up data structures.
     541      775366 :   cleanup(*this);
     542             : 
     543             :   // Print out the final stats.
     544             :   DEBUG(dump());
     545      775366 : }
     546             : 
     547             : BlockFrequency
     548     6328317 : BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
     549     6328317 :   if (!Node.isValid())
     550       16252 :     return 0;
     551    12624130 :   return Freqs[Node.Index].Integer;
     552             : }
     553             : 
     554             : Optional<uint64_t>
     555         473 : BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
     556             :                                                  const BlockNode &Node) const {
     557         946 :   return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
     558             : }
     559             : 
     560             : Optional<uint64_t>
     561         543 : BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
     562             :                                                     uint64_t Freq) const {
     563        1086 :   auto EntryCount = F.getEntryCount();
     564         543 :   if (!EntryCount)
     565             :     return None;
     566             :   // Use 128 bit APInt to do the arithmetic to avoid overflow.
     567         418 :   APInt BlockCount(128, EntryCount.getValue());
     568         418 :   APInt BlockFreq(128, Freq);
     569         627 :   APInt EntryFreq(128, getEntryFreq());
     570         209 :   BlockCount *= BlockFreq;
     571         627 :   BlockCount = BlockCount.udiv(EntryFreq);
     572         418 :   return BlockCount.getLimitedValue();
     573             : }
     574             : 
     575             : Scaled64
     576         435 : BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
     577         435 :   if (!Node.isValid())
     578             :     return Scaled64::getZero();
     579         870 :   return Freqs[Node.Index].Scaled;
     580             : }
     581             : 
     582        1916 : void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
     583             :                                               uint64_t Freq) {
     584             :   assert(Node.isValid() && "Expected valid node");
     585             :   assert(Node.Index < Freqs.size() && "Expected legal index");
     586        3832 :   Freqs[Node.Index].Integer = Freq;
     587        1916 : }
     588             : 
     589             : std::string
     590           0 : BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
     591           0 :   return {};
     592             : }
     593             : 
     594             : std::string
     595           0 : BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
     596           0 :   return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
     597             : }
     598             : 
     599             : raw_ostream &
     600           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     601             :                                            const BlockNode &Node) const {
     602           0 :   return OS << getFloatingBlockFreq(Node);
     603             : }
     604             : 
     605             : raw_ostream &
     606           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     607             :                                            const BlockFrequency &Freq) const {
     608           0 :   Scaled64 Block(Freq.getFrequency(), 0);
     609           0 :   Scaled64 Entry(getEntryFreq(), 0);
     610             : 
     611           0 :   return OS << Block / Entry;
     612             : }
     613             : 
     614          29 : void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
     615          29 :   Start = OuterLoop.getHeader();
     616          58 :   Nodes.reserve(OuterLoop.Nodes.size());
     617         547 :   for (auto N : OuterLoop.Nodes)
     618         460 :     addNode(N);
     619          29 :   indexNodes();
     620          29 : }
     621             : 
     622          89 : void IrreducibleGraph::addNodesInFunction() {
     623          89 :   Start = 0;
     624        4238 :   for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
     625        6090 :     if (!BFI.Working[Index].isPackaged())
     626        1926 :       addNode(Index);
     627          89 :   indexNodes();
     628          89 : }
     629             : 
     630         118 : void IrreducibleGraph::indexNodes() {
     631        2858 :   for (auto &I : Nodes)
     632        4772 :     Lookup[I.Node.Index] = &I;
     633         118 : }
     634             : 
     635        4788 : void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
     636             :                                const BFIBase::LoopData *OuterLoop) {
     637        4788 :   if (OuterLoop && OuterLoop->isHeader(Succ))
     638         136 :     return;
     639        4729 :   auto L = Lookup.find(Succ.Index);
     640       14187 :   if (L == Lookup.end())
     641             :     return;
     642        4652 :   IrrNode &SuccIrr = *L->second;
     643        9304 :   Irr.Edges.push_back(&SuccIrr);
     644        9304 :   SuccIrr.Edges.push_front(&Irr);
     645        4652 :   ++SuccIrr.NumIn;
     646             : }
     647             : 
     648             : namespace llvm {
     649             : 
     650             : template <> struct GraphTraits<IrreducibleGraph> {
     651             :   using GraphT = bfi_detail::IrreducibleGraph;
     652             :   using NodeRef = const GraphT::IrrNode *;
     653             :   using ChildIteratorType = GraphT::IrrNode::iterator;
     654             : 
     655             :   static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
     656        2386 :   static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
     657        7038 :   static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
     658             : };
     659             : 
     660             : } // end namespace llvm
     661             : 
     662             : /// \brief Find extra irreducible headers.
     663             : ///
     664             : /// Find entry blocks and other blocks with backedges, which exist when \c G
     665             : /// contains irreducible sub-SCCs.
     666         120 : static void findIrreducibleHeaders(
     667             :     const BlockFrequencyInfoImplBase &BFI,
     668             :     const IrreducibleGraph &G,
     669             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
     670             :     LoopData::NodeList &Headers, LoopData::NodeList &Others) {
     671             :   // Map from nodes in the SCC to whether it's an entry block.
     672         182 :   SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
     673             : 
     674             :   // InSCC also acts the set of nodes in the graph.  Seed it.
     675        1980 :   for (const auto *I : SCC)
     676        1500 :     InSCC[I] = false;
     677             : 
     678        1860 :   for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
     679        1500 :     auto &Irr = *I->first;
     680       10816 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     681        3671 :       if (InSCC.count(P))
     682             :         continue;
     683             : 
     684             :       // This is an entry block.
     685         355 :       I->second = true;
     686         355 :       Headers.push_back(Irr.Node);
     687             :       DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
     688             :       break;
     689             :     }
     690             :   }
     691             :   assert(Headers.size() >= 2 &&
     692             :          "Expected irreducible CFG; -loop-info is likely invalid");
     693         360 :   if (Headers.size() == InSCC.size()) {
     694             :     // Every block is a header.
     695         116 :     std::sort(Headers.begin(), Headers.end());
     696          58 :     return;
     697             :   }
     698             : 
     699             :   // Look for extra headers from irreducible sub-SCCs.
     700        1491 :   for (const auto &I : InSCC) {
     701             :     // Entry blocks are already headers.
     702        1305 :     if (I.second)
     703             :       continue;
     704             : 
     705        1145 :     auto &Irr = *I.first;
     706        7006 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     707             :       // Skip forward edges.
     708        2738 :       if (P->Node < Irr.Node)
     709             :         continue;
     710             : 
     711             :       // Skip predecessors from entry blocks.  These can have inverted
     712             :       // ordering.
     713         101 :       if (InSCC.lookup(P))
     714             :         continue;
     715             : 
     716             :       // Store the extra header.
     717          88 :       Headers.push_back(Irr.Node);
     718             :       DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
     719             :       break;
     720             :     }
     721        3435 :     if (Headers.back() == Irr.Node)
     722             :       // Added this as a header.
     723             :       continue;
     724             : 
     725             :     // This is not a header.
     726        1057 :     Others.push_back(Irr.Node);
     727             :     DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
     728             :   }
     729         248 :   std::sort(Headers.begin(), Headers.end());
     730         248 :   std::sort(Others.begin(), Others.end());
     731             : }
     732             : 
     733         120 : static void createIrreducibleLoop(
     734             :     BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
     735             :     LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
     736             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
     737             :   // Translate the SCC into RPO.
     738             :   DEBUG(dbgs() << " - found-scc\n");
     739             : 
     740         240 :   LoopData::NodeList Headers;
     741         240 :   LoopData::NodeList Others;
     742         120 :   findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
     743             : 
     744         240 :   auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
     745         840 :                                 Headers.end(), Others.begin(), Others.end());
     746             : 
     747             :   // Update loop hierarchy.
     748        1860 :   for (const auto &N : Loop->Nodes)
     749        3133 :     if (BFI.Working[N.Index].isLoopHeader())
     750         266 :       BFI.Working[N.Index].Loop->Parent = &*Loop;
     751             :     else
     752        2734 :       BFI.Working[N.Index].Loop = &*Loop;
     753         120 : }
     754             : 
     755             : iterator_range<std::list<LoopData>::iterator>
     756         118 : BlockFrequencyInfoImplBase::analyzeIrreducible(
     757             :     const IrreducibleGraph &G, LoopData *OuterLoop,
     758             :     std::list<LoopData>::iterator Insert) {
     759             :   assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
     760         296 :   auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
     761             : 
     762        1242 :   for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
     763        2012 :     if (I->size() < 2)
     764         886 :       continue;
     765             : 
     766             :     // Translate the SCC into RPO.
     767         120 :     createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
     768             :   }
     769             : 
     770         118 :   if (OuterLoop)
     771          29 :     return make_range(std::next(Prev), Insert);
     772          89 :   return make_range(Loops.begin(), Insert);
     773             : }
     774             : 
     775             : void
     776          29 : BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
     777          58 :   OuterLoop.Exits.clear();
     778         116 :   for (auto &Mass : OuterLoop.BackedgeMass)
     779          29 :     Mass = BlockMass::getEmpty();
     780          58 :   auto O = OuterLoop.Nodes.begin() + 1;
     781         489 :   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
     782        1293 :     if (!Working[I->Index].isPackaged())
     783         202 :       *O++ = *I;
     784          58 :   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
     785          29 : }
     786             : 
     787         120 : void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
     788             :   assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
     789             : 
     790             :   // Since the loop has more than one header block, the mass flowing back into
     791             :   // each header will be different. Adjust the mass in each header loop to
     792             :   // reflect the masses flowing through back edges.
     793             :   //
     794             :   // To do this, we distribute the initial mass using the backedge masses
     795             :   // as weights for the distribution.
     796         120 :   BlockMass LoopMass = BlockMass::getFull();
     797         240 :   Distribution Dist;
     798             : 
     799             :   DEBUG(dbgs() << "adjust-loop-header-mass:\n");
     800         563 :   for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
     801         886 :     auto &HeaderNode = Loop.Nodes[H];
     802         886 :     auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
     803             :     DEBUG(dbgs() << " - Add back edge mass for node "
     804             :                  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
     805         443 :     if (BackedgeMass.getMass() > 0)
     806         441 :       Dist.addLocal(HeaderNode, BackedgeMass.getMass());
     807             :     else
     808             :       DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
     809             :   }
     810             : 
     811         120 :   DitheringDistributer D(Dist, LoopMass);
     812             : 
     813             :   DEBUG(dbgs() << " Distribute loop mass " << LoopMass
     814             :                << " to headers using above weights\n");
     815         801 :   for (const Weight &W : Dist.Weights) {
     816         441 :     BlockMass Taken = D.takeMass(W.Amount);
     817             :     assert(W.Type == Weight::Local && "all weights should be local");
     818         882 :     Working[W.TargetNode.Index].getMass() = Taken;
     819             :     DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     820             :   }
     821         120 : }

Generated by: LCOV version 1.13