LCOV - code coverage report
Current view: top level - lib/Analysis - BlockFrequencyInfoImpl.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 214 269 79.6 %
Date: 2018-10-20 13:21:21 Functions: 32 41 78.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Loops should be simplified before this analysis.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
      15             : #include "llvm/ADT/APInt.h"
      16             : #include "llvm/ADT/DenseMap.h"
      17             : #include "llvm/ADT/GraphTraits.h"
      18             : #include "llvm/ADT/None.h"
      19             : #include "llvm/ADT/SCCIterator.h"
      20             : #include "llvm/Config/llvm-config.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include "llvm/Support/BlockFrequency.h"
      23             : #include "llvm/Support/BranchProbability.h"
      24             : #include "llvm/Support/Compiler.h"
      25             : #include "llvm/Support/Debug.h"
      26             : #include "llvm/Support/ScaledNumber.h"
      27             : #include "llvm/Support/MathExtras.h"
      28             : #include "llvm/Support/raw_ostream.h"
      29             : #include <algorithm>
      30             : #include <cassert>
      31             : #include <cstddef>
      32             : #include <cstdint>
      33             : #include <iterator>
      34             : #include <list>
      35             : #include <numeric>
      36             : #include <utility>
      37             : #include <vector>
      38             : 
      39             : using namespace llvm;
      40             : using namespace llvm::bfi_detail;
      41             : 
      42             : #define DEBUG_TYPE "block-freq"
      43             : 
      44     3794959 : ScaledNumber<uint64_t> BlockMass::toScaled() const {
      45     3794959 :   if (isFull())
      46     1534340 :     return ScaledNumber<uint64_t>(1, 0);
      47     2260619 :   return ScaledNumber<uint64_t>(getMass() + 1, -64);
      48             : }
      49             : 
      50             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
      51             : LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
      52             : #endif
      53             : 
      54             : static char getHexDigit(int N) {
      55             :   assert(N < 16);
      56           0 :   if (N < 10)
      57           0 :     return '0' + N;
      58           0 :   return 'a' + N - 10;
      59             : }
      60             : 
      61           0 : raw_ostream &BlockMass::print(raw_ostream &OS) const {
      62           0 :   for (int Digits = 0; Digits < 16; ++Digits)
      63           0 :     OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
      64           0 :   return OS;
      65             : }
      66             : 
      67             : namespace {
      68             : 
      69             : using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
      70             : using Distribution = BlockFrequencyInfoImplBase::Distribution;
      71             : using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
      72             : using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
      73             : using LoopData = BlockFrequencyInfoImplBase::LoopData;
      74             : using Weight = BlockFrequencyInfoImplBase::Weight;
      75             : using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
      76             : 
      77             : /// Dithering mass distributer.
      78             : ///
      79             : /// This class splits up a single mass into portions by weight, dithering to
      80             : /// spread out error.  No mass is lost.  The dithering precision depends on the
      81             : /// precision of the product of \a BlockMass and \a BranchProbability.
      82             : ///
      83             : /// The distribution algorithm follows.
      84             : ///
      85             : ///  1. Initialize by saving the sum of the weights in \a RemWeight and the
      86             : ///     mass to distribute in \a RemMass.
      87             : ///
      88             : ///  2. For each portion:
      89             : ///
      90             : ///      1. Construct a branch probability, P, as the portion's weight divided
      91             : ///         by the current value of \a RemWeight.
      92             : ///      2. Calculate the portion's mass as \a RemMass times P.
      93             : ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
      94             : ///         the current portion's weight and mass.
      95             : struct DitheringDistributer {
      96             :   uint32_t RemWeight;
      97             :   BlockMass RemMass;
      98             : 
      99             :   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
     100             : 
     101             :   BlockMass takeMass(uint32_t Weight);
     102             : };
     103             : 
     104             : } // end anonymous namespace
     105             : 
     106           0 : DitheringDistributer::DitheringDistributer(Distribution &Dist,
     107         181 :                                            const BlockMass &Mass) {
     108     3726761 :   Dist.normalize();
     109     3726762 :   RemWeight = Dist.Total;
     110     3726762 :   RemMass = Mass;
     111           0 : }
     112             : 
     113     3376597 : BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
     114             :   assert(Weight && "invalid weight");
     115             :   assert(Weight <= RemWeight);
     116     3376597 :   BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
     117             : 
     118             :   // Decrement totals (dither).
     119     3376597 :   RemWeight -= Weight;
     120             :   RemMass -= Mass;
     121     3376597 :   return Mass;
     122             : }
     123             : 
     124     3404924 : void Distribution::add(const BlockNode &Node, uint64_t Amount,
     125             :                        Weight::DistType Type) {
     126             :   assert(Amount && "invalid weight of 0");
     127     3404924 :   uint64_t NewTotal = Total + Amount;
     128             : 
     129             :   // Check for overflow.  It should be impossible to overflow twice.
     130     3404924 :   bool IsOverflow = NewTotal < Total;
     131             :   assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
     132     3404924 :   DidOverflow |= IsOverflow;
     133             : 
     134             :   // Update the total.
     135     3404924 :   Total = NewTotal;
     136             : 
     137             :   // Save the weight.
     138     6809848 :   Weights.push_back(Weight(Type, Node, Amount));
     139     3404924 : }
     140             : 
     141             : static void combineWeight(Weight &W, const Weight &OtherW) {
     142             :   assert(OtherW.TargetNode.isValid());
     143       29115 :   if (!W.Amount) {
     144         817 :     W = OtherW;
     145             :     return;
     146             :   }
     147             :   assert(W.Type == OtherW.Type);
     148             :   assert(W.TargetNode == OtherW.TargetNode);
     149             :   assert(OtherW.Amount && "Expected non-zero weight");
     150       28298 :   if (W.Amount > W.Amount + OtherW.Amount)
     151             :     // Saturate on overflow.
     152           2 :     W.Amount = UINT64_MAX;
     153             :   else
     154       28296 :     W.Amount += OtherW.Amount;
     155             : }
     156             : 
     157     1142884 : static void combineWeightsBySorting(WeightList &Weights) {
     158             :   // Sort so edges to the same node are adjacent.
     159             :   llvm::sort(Weights, [](const Weight &L, const Weight &R) {
     160           0 :     return L.TargetNode < R.TargetNode;
     161             :   });
     162             : 
     163             :   // Combine adjacent edges.
     164             :   WeightList::iterator O = Weights.begin();
     165     3523762 :   for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
     166             :        ++O, (I = L)) {
     167     2380878 :     *O = *I;
     168             : 
     169             :     // Find the adjacent weights to the same node.
     170     2408810 :     for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
     171             :       combineWeight(*O, *L);
     172             :   }
     173             : 
     174             :   // Erase extra entries.
     175             :   Weights.erase(O, Weights.end());
     176     1142884 : }
     177             : 
     178           7 : static void combineWeightsByHashing(WeightList &Weights) {
     179             :   // Collect weights into a DenseMap.
     180             :   using HashTable = DenseMap<BlockNode::IndexType, Weight>;
     181             : 
     182          21 :   HashTable Combined(NextPowerOf2(2 * Weights.size()));
     183        1190 :   for (const Weight &W : Weights)
     184        1183 :     combineWeight(Combined[W.TargetNode.Index], W);
     185             : 
     186             :   // Check whether anything changed.
     187          14 :   if (Weights.size() == Combined.size())
     188             :     return;
     189             : 
     190             :   // Fill in the new weights.
     191             :   Weights.clear();
     192             :   Weights.reserve(Combined.size());
     193         338 :   for (const auto &I : Combined)
     194         334 :     Weights.push_back(I.second);
     195             : }
     196             : 
     197     1142891 : static void combineWeights(WeightList &Weights) {
     198             :   // Use a hash table for many successors to keep this linear.
     199     1142891 :   if (Weights.size() > 128) {
     200           7 :     combineWeightsByHashing(Weights);
     201           7 :     return;
     202             :   }
     203             : 
     204     1142884 :   combineWeightsBySorting(Weights);
     205             : }
     206             : 
     207             : static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
     208             :   assert(Shift >= 0);
     209             :   assert(Shift < 64);
     210             :   if (!Shift)
     211             :     return N;
     212      116133 :   return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
     213             : }
     214             : 
     215     3726764 : void Distribution::normalize() {
     216             :   // Early exit for termination nodes.
     217     3726764 :   if (Weights.empty())
     218             :     return;
     219             : 
     220             :   // Only bother if there are multiple successors.
     221     2137799 :   if (Weights.size() > 1)
     222     1142891 :     combineWeights(Weights);
     223             : 
     224             :   // Early exit when combined into a single successor.
     225     4275598 :   if (Weights.size() == 1) {
     226      998093 :     Total = 1;
     227      998093 :     Weights.front().Amount = 1;
     228      998093 :     return;
     229             :   }
     230             : 
     231             :   // Determine how much to shift right so that the total fits into 32-bits.
     232             :   //
     233             :   // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
     234             :   // for each weight can cause a 32-bit overflow.
     235             :   int Shift = 0;
     236     1139706 :   if (DidOverflow)
     237             :     Shift = 33;
     238     1139706 :   else if (Total > UINT32_MAX)
     239       21209 :     Shift = 33 - countLeadingZeros(Total);
     240             : 
     241             :   // Early exit if nothing needs to be scaled.
     242             :   if (!Shift) {
     243             :     // If we didn't overflow then combineWeights() shouldn't have changed the
     244             :     // sum of the weights, but let's double-check.
     245             :     assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
     246             :                                     [](uint64_t Sum, const Weight &W) {
     247             :                       return Sum + W.Amount;
     248             :                     }) &&
     249             :            "Expected total to be correct");
     250             :     return;
     251             :   }
     252             : 
     253             :   // Recompute the total through accumulation (rather than shifting it) so that
     254             :   // it's accurate after shifting and any changes combineWeights() made above.
     255       21209 :   Total = 0;
     256             : 
     257             :   // Sum the weights to each node and shift right if necessary.
     258      137342 :   for (Weight &W : Weights) {
     259             :     // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
     260             :     // can round here without concern about overflow.
     261             :     assert(W.TargetNode.isValid());
     262      116133 :     W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
     263             :     assert(W.Amount <= UINT32_MAX);
     264             : 
     265             :     // Update the total.
     266      116133 :     Total += W.Amount;
     267             :   }
     268             :   assert(Total <= UINT32_MAX);
     269             : }
     270             : 
     271     2628379 : void BlockFrequencyInfoImplBase::clear() {
     272             :   // Swap with a default-constructed std::vector, since std::vector<>::clear()
     273             :   // does not actually clear heap storage.
     274             :   std::vector<FrequencyData>().swap(Freqs);
     275             :   IsIrrLoopHeader.clear();
     276             :   std::vector<WorkingData>().swap(Working);
     277             :   Loops.clear();
     278     2628379 : }
     279             : 
     280             : /// Clear all memory not needed downstream.
     281             : ///
     282             : /// Releases all memory not used downstream.  In particular, saves Freqs.
     283     1314190 : static void cleanup(BlockFrequencyInfoImplBase &BFI) {
     284     1314190 :   std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
     285     1314190 :   SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
     286     1314190 :   BFI.clear();
     287             :   BFI.Freqs = std::move(SavedFreqs);
     288     1314190 :   BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
     289     1314190 : }
     290             : 
     291     3403748 : bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
     292             :                                            const LoopData *OuterLoop,
     293             :                                            const BlockNode &Pred,
     294             :                                            const BlockNode &Succ,
     295             :                                            uint64_t Weight) {
     296     3403748 :   if (!Weight)
     297             :     Weight = 1;
     298             : 
     299             :   auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
     300      664680 :     return OuterLoop && OuterLoop->isHeader(Node);
     301             :   };
     302             : 
     303     6807496 :   BlockNode Resolved = Working[Succ.Index].getResolvedNode();
     304             : 
     305             : #ifndef NDEBUG
     306             :   auto debugSuccessor = [&](const char *Type) {
     307             :     dbgs() << "  =>"
     308             :            << " [" << Type << "] weight = " << Weight;
     309             :     if (!isLoopHeader(Resolved))
     310             :       dbgs() << ", succ = " << getBlockName(Succ);
     311             :     if (Resolved != Succ)
     312             :       dbgs() << ", resolved = " << getBlockName(Resolved);
     313             :     dbgs() << "\n";
     314             :   };
     315             :   (void)debugSuccessor;
     316             : #endif
     317             : 
     318             :   if (isLoopHeader(Resolved)) {
     319             :     LLVM_DEBUG(debugSuccessor("backedge"));
     320             :     Dist.addBackedge(Resolved, Weight);
     321       76810 :     return true;
     322             :   }
     323             : 
     324     6653876 :   if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
     325             :     LLVM_DEBUG(debugSuccessor("  exit  "));
     326             :     Dist.addExit(Resolved, Weight);
     327      190628 :     return true;
     328             :   }
     329             : 
     330     3136310 :   if (Resolved < Pred) {
     331             :     if (!isLoopHeader(Pred)) {
     332             :       // If OuterLoop is an irreducible loop, we can't actually handle this.
     333             :       assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
     334             :              "unhandled irreducible control flow");
     335             : 
     336             :       // Irreducible backedge.  Abort.
     337             :       LLVM_DEBUG(debugSuccessor("abort!!!"));
     338             :       return false;
     339             :     }
     340             : 
     341             :     // If "Pred" is a loop header, then this isn't really a backedge; rather,
     342             :     // OuterLoop must be irreducible.  These false backedges can come only from
     343             :     // secondary loop headers.
     344             :     assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
     345             :            "unhandled irreducible control flow");
     346             :   }
     347             : 
     348             :   LLVM_DEBUG(debugSuccessor(" local  "));
     349             :   Dist.addLocal(Resolved, Weight);
     350     3136126 :   return true;
     351             : }
     352             : 
     353       72271 : bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
     354             :     const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
     355             :   // Copy the exit map into Dist.
     356      259922 :   for (const auto &I : Loop.Exits)
     357      375364 :     if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
     358             :                    I.second.getMass()))
     359             :       // Irreducible backedge.
     360             :       return false;
     361             : 
     362             :   return true;
     363             : }
     364             : 
     365             : /// Compute the loop scale for a loop.
     366       72153 : void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
     367             :   // Compute loop scale.
     368             :   LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
     369             : 
     370             :   // Infinite loops need special handling. If we give the back edge an infinite
     371             :   // mass, they may saturate all the other scales in the function down to 1,
     372             :   // making all the other region temperatures look exactly the same. Choose an
     373             :   // arbitrary scale to avoid these issues.
     374             :   //
     375             :   // FIXME: An alternate way would be to select a symbolic scale which is later
     376             :   // replaced to be the maximum of all computed scales plus 1. This would
     377             :   // appropriately describe the loop as having a large scale, without skewing
     378             :   // the final frequency computation.
     379             :   const Scaled64 InfiniteLoopScale(1, 12);
     380             : 
     381             :   // LoopScale == 1 / ExitMass
     382             :   // ExitMass == HeadMass - BackedgeMass
     383             :   BlockMass TotalBackedgeMass;
     384      144805 :   for (auto &Mass : Loop.BackedgeMass)
     385             :     TotalBackedgeMass += Mass;
     386       72153 :   BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
     387             : 
     388             :   // Block scale stores the inverse of the scale. If this is an infinite loop,
     389             :   // its exit mass will be zero. In this case, use an arbitrary scale for the
     390             :   // loop scale.
     391       72153 :   Loop.Scale =
     392      141921 :       ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
     393             : 
     394             :   LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
     395             :                     << BlockMass::getFull() << " - " << TotalBackedgeMass
     396             :                     << ")\n"
     397             :                     << " - scale = " << Loop.Scale << "\n");
     398       72153 : }
     399             : 
     400             : /// Package up a loop.
     401       72153 : void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
     402             :   LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
     403             : 
     404             :   // Clear the subloop exits to prevent quadratic memory usage.
     405      449689 :   for (const BlockNode &M : Loop.Nodes) {
     406      755072 :     if (auto *Loop = Working[M.Index].getPackagedLoop())
     407             :       Loop->Exits.clear();
     408             :     LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
     409             :   }
     410       72153 :   Loop.IsPackaged = true;
     411       72153 : }
     412             : 
     413             : #ifndef NDEBUG
     414             : static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
     415             :                         const DitheringDistributer &D, const BlockNode &T,
     416             :                         const BlockMass &M, const char *Desc) {
     417             :   dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
     418             :   if (Desc)
     419             :     dbgs() << " [" << Desc << "]";
     420             :   if (T.isValid())
     421             :     dbgs() << " to " << BFI.getBlockName(T);
     422             :   dbgs() << "\n";
     423             : }
     424             : #endif
     425             : 
     426     3726392 : void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
     427             :                                                 LoopData *OuterLoop,
     428             :                                                 Distribution &Dist) {
     429     7452784 :   BlockMass Mass = Working[Source.Index].getMass();
     430             :   LLVM_DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
     431             : 
     432             :   // Distribute mass to successors as laid out in Dist.
     433             :   DitheringDistributer D(Dist, Mass);
     434             : 
     435     7101637 :   for (const Weight &W : Dist.Weights) {
     436             :     // Check for a local edge (non-backedge and non-exit).
     437     3375243 :     BlockMass Taken = D.takeMass(W.Amount);
     438     3375243 :     if (W.Type == Weight::Local) {
     439     6222076 :       Working[W.TargetNode.Index].getMass() += Taken;
     440             :       LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     441     3187800 :       continue;
     442             :     }
     443             : 
     444             :     // Backedges and exits only make sense if we're processing a loop.
     445             :     assert(OuterLoop && "backedge or exit outside of loop");
     446             : 
     447             :     // Check for a backedge.
     448      264205 :     if (W.Type == Weight::Backedge) {
     449       76762 :       OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
     450             :       LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
     451       76762 :       continue;
     452             :     }
     453             : 
     454             :     // This must be an exit.
     455             :     assert(W.Type == Weight::Exit);
     456      374886 :     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
     457             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
     458             :   }
     459     3726394 : }
     460             : 
     461     1314190 : static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
     462             :                                      const Scaled64 &Min, const Scaled64 &Max) {
     463             :   // Scale the Factor to a size that creates integers.  Ideally, integers would
     464             :   // be scaled so that Max == UINT64_MAX so that they can be best
     465             :   // differentiated.  However, in the presence of large frequency values, small
     466             :   // frequencies are scaled down to 1, making it impossible to differentiate
     467             :   // small, unequal numbers. When the spread between Min and Max frequencies
     468             :   // fits well within MaxBits, we make the scale be at least 8.
     469             :   const unsigned MaxBits = 64;
     470     1314189 :   const unsigned SpreadBits = (Max / Min).lg();
     471     1314189 :   Scaled64 ScalingFactor;
     472     1314189 :   if (SpreadBits <= MaxBits - 3) {
     473             :     // If the values are small enough, make the scaling factor at least 8 to
     474             :     // allow distinguishing small values.
     475     1312317 :     ScalingFactor = Min.inverse();
     476             :     ScalingFactor <<= 3;
     477             :   } else {
     478             :     // If the values need more than MaxBits to be represented, saturate small
     479             :     // frequency values down to 1 by using a scaling factor that benefits large
     480             :     // frequency values.
     481        1873 :     ScalingFactor = Scaled64(1, MaxBits) / Max;
     482             :   }
     483             : 
     484             :   // Translate the floats to integers.
     485             :   LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
     486             :                     << ", factor = " << ScalingFactor << "\n");
     487     9934456 :   for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
     488     3653038 :     Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
     489    10950508 :     BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
     490             :     LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
     491             :                       << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
     492             :                       << ", int = " << BFI.Freqs[Index].Integer << "\n");
     493             :   }
     494     1314190 : }
     495             : 
     496             : /// Unwrap a loop package.
     497             : ///
     498             : /// Visits all the members of a loop, adjusting their BlockData according to
     499             : /// the loop's pseudo-node.
     500       72153 : static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
     501             :   LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
     502             :                     << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
     503             :                     << "\n");
     504       72153 :   Loop.Scale *= Loop.Mass.toScaled();
     505       72153 :   Loop.IsPackaged = false;
     506             :   LLVM_DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
     507             : 
     508             :   // Propagate the head scale through the loop.  Since members are visited in
     509             :   // RPO, the head scale will be updated by the loop scale first, and then the
     510             :   // final head scale will be used for updated the rest of the members.
     511      449689 :   for (const BlockNode &N : Loop.Nodes) {
     512      377536 :     const auto &Working = BFI.Working[N.Index];
     513      389179 :     Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
     514      389179 :                                        : BFI.Freqs[N.Index].Scaled;
     515      377536 :     Scaled64 New = Loop.Scale * F;
     516             :     LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
     517             :                       << New << "\n");
     518      377536 :     F = New;
     519             :   }
     520       72153 : }
     521             : 
     522     1314190 : void BlockFrequencyInfoImplBase::unwrapLoops() {
     523             :   // Set initial frequencies from loop-local masses.
     524     9934454 :   for (size_t Index = 0; Index < Working.size(); ++Index)
     525     3653038 :     Freqs[Index].Scaled = Working[Index].Mass.toScaled();
     526             : 
     527     1386342 :   for (LoopData &Loop : Loops)
     528       72153 :     unwrapLoop(*this, Loop);
     529     1314189 : }
     530             : 
     531     1314189 : void BlockFrequencyInfoImplBase::finalizeMetrics() {
     532             :   // Unwrap loop packages in reverse post-order, tracking min and max
     533             :   // frequencies.
     534     1314189 :   auto Min = Scaled64::getLargest();
     535     1314189 :   auto Max = Scaled64::getZero();
     536     9934454 :   for (size_t Index = 0; Index < Working.size(); ++Index) {
     537             :     // Update min/max scale.
     538     7306074 :     Min = std::min(Min, Freqs[Index].Scaled);
     539    10959114 :     Max = std::max(Max, Freqs[Index].Scaled);
     540             :   }
     541             : 
     542             :   // Convert to integers.
     543     1314190 :   convertFloatingToInteger(*this, Min, Max);
     544             : 
     545             :   // Clean up data structures.
     546     1314190 :   cleanup(*this);
     547             : 
     548             :   // Print out the final stats.
     549             :   LLVM_DEBUG(dump());
     550     1314190 : }
     551             : 
     552             : BlockFrequency
     553     8757329 : BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
     554     8757329 :   if (!Node.isValid())
     555       99463 :     return 0;
     556    17315732 :   return Freqs[Node.Index].Integer;
     557             : }
     558             : 
     559             : Optional<uint64_t>
     560        1581 : BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
     561             :                                                  const BlockNode &Node) const {
     562        1581 :   return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
     563             : }
     564             : 
     565             : Optional<uint64_t>
     566        1673 : BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
     567             :                                                     uint64_t Freq) const {
     568        1673 :   auto EntryCount = F.getEntryCount();
     569        1673 :   if (!EntryCount)
     570             :     return None;
     571             :   // Use 128 bit APInt to do the arithmetic to avoid overflow.
     572         556 :   APInt BlockCount(128, EntryCount.getCount());
     573             :   APInt BlockFreq(128, Freq);
     574             :   APInt EntryFreq(128, getEntryFreq());
     575         556 :   BlockCount *= BlockFreq;
     576             :   // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
     577             :   // lshr by 1 gives EntryFreq/2.
     578        2224 :   BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
     579             :   return BlockCount.getLimitedValue();
     580             : }
     581             : 
     582             : bool
     583         272 : BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
     584         272 :   if (!Node.isValid())
     585             :     return false;
     586         266 :   return IsIrrLoopHeader.test(Node.Index);
     587             : }
     588             : 
     589             : Scaled64
     590         532 : BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
     591         532 :   if (!Node.isValid())
     592             :     return Scaled64::getZero();
     593        1064 :   return Freqs[Node.Index].Scaled;
     594             : }
     595             : 
     596        2136 : void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
     597             :                                               uint64_t Freq) {
     598             :   assert(Node.isValid() && "Expected valid node");
     599             :   assert(Node.Index < Freqs.size() && "Expected legal index");
     600        4272 :   Freqs[Node.Index].Integer = Freq;
     601        2136 : }
     602             : 
     603             : std::string
     604           0 : BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
     605           0 :   return {};
     606             : }
     607             : 
     608             : std::string
     609           0 : BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
     610           0 :   return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
     611             : }
     612             : 
     613             : raw_ostream &
     614           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     615             :                                            const BlockNode &Node) const {
     616           0 :   return OS << getFloatingBlockFreq(Node);
     617             : }
     618             : 
     619             : raw_ostream &
     620           0 : BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
     621             :                                            const BlockFrequency &Freq) const {
     622           0 :   Scaled64 Block(Freq.getFrequency(), 0);
     623             :   Scaled64 Entry(getEntryFreq(), 0);
     624             : 
     625           0 :   return OS << Block / Entry;
     626             : }
     627             : 
     628          51 : void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
     629          51 :   Start = OuterLoop.getHeader();
     630         102 :   Nodes.reserve(OuterLoop.Nodes.size());
     631         800 :   for (auto N : OuterLoop.Nodes)
     632         749 :     addNode(N);
     633          51 :   indexNodes();
     634          51 : }
     635             : 
     636         133 : void IrreducibleGraph::addNodesInFunction() {
     637         133 :   Start = 0;
     638        3015 :   for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
     639        2749 :     if (!BFI.Working[Index].isPackaged())
     640        2546 :       addNode(Index);
     641         133 :   indexNodes();
     642         133 : }
     643             : 
     644         184 : void IrreducibleGraph::indexNodes() {
     645        3479 :   for (auto &I : Nodes)
     646        3295 :     Lookup[I.Node.Index] = &I;
     647         184 : }
     648             : 
     649        6736 : void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
     650             :                                const BFIBase::LoopData *OuterLoop) {
     651        6736 :   if (OuterLoop && OuterLoop->isHeader(Succ))
     652         212 :     return;
     653        6655 :   auto L = Lookup.find(Succ.Index);
     654        6655 :   if (L == Lookup.end())
     655             :     return;
     656        6524 :   IrrNode &SuccIrr = *L->second;
     657        6524 :   Irr.Edges.push_back(&SuccIrr);
     658        6524 :   SuccIrr.Edges.push_front(&Irr);
     659        6524 :   ++SuccIrr.NumIn;
     660             : }
     661             : 
     662             : namespace llvm {
     663             : 
     664             : template <> struct GraphTraits<IrreducibleGraph> {
     665             :   using GraphT = bfi_detail::IrreducibleGraph;
     666             :   using NodeRef = const GraphT::IrrNode *;
     667             :   using ChildIteratorType = GraphT::IrrNode::iterator;
     668             : 
     669           0 :   static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
     670        3295 :   static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
     671             :   static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
     672             : };
     673             : 
     674             : } // end namespace llvm
     675             : 
     676             : /// Find extra irreducible headers.
     677             : ///
     678             : /// Find entry blocks and other blocks with backedges, which exist when \c G
     679             : /// contains irreducible sub-SCCs.
     680           0 : static void findIrreducibleHeaders(
     681             :     const BlockFrequencyInfoImplBase &BFI,
     682             :     const IrreducibleGraph &G,
     683             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
     684             :     LoopData::NodeList &Headers, LoopData::NodeList &Others) {
     685             :   // Map from nodes in the SCC to whether it's an entry block.
     686             :   SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
     687             : 
     688             :   // InSCC also acts the set of nodes in the graph.  Seed it.
     689           0 :   for (const auto *I : SCC)
     690           0 :     InSCC[I] = false;
     691             : 
     692           0 :   for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
     693           0 :     auto &Irr = *I->first;
     694           0 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     695           0 :       if (InSCC.count(P))
     696             :         continue;
     697             : 
     698             :       // This is an entry block.
     699           0 :       I->second = true;
     700           0 :       Headers.push_back(Irr.Node);
     701             :       LLVM_DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node)
     702             :                         << "\n");
     703           0 :       break;
     704             :     }
     705             :   }
     706             :   assert(Headers.size() >= 2 &&
     707             :          "Expected irreducible CFG; -loop-info is likely invalid");
     708           0 :   if (Headers.size() == InSCC.size()) {
     709             :     // Every block is a header.
     710             :     llvm::sort(Headers);
     711           0 :     return;
     712             :   }
     713             : 
     714             :   // Look for extra headers from irreducible sub-SCCs.
     715           0 :   for (const auto &I : InSCC) {
     716             :     // Entry blocks are already headers.
     717           0 :     if (I.second)
     718           0 :       continue;
     719             : 
     720           0 :     auto &Irr = *I.first;
     721           0 :     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
     722             :       // Skip forward edges.
     723           0 :       if (P->Node < Irr.Node)
     724           0 :         continue;
     725             : 
     726             :       // Skip predecessors from entry blocks.  These can have inverted
     727             :       // ordering.
     728           0 :       if (InSCC.lookup(P))
     729           0 :         continue;
     730             : 
     731             :       // Store the extra header.
     732           0 :       Headers.push_back(Irr.Node);
     733             :       LLVM_DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node)
     734             :                         << "\n");
     735           0 :       break;
     736             :     }
     737           0 :     if (Headers.back() == Irr.Node)
     738             :       // Added this as a header.
     739           0 :       continue;
     740             : 
     741             :     // This is not a header.
     742           0 :     Others.push_back(Irr.Node);
     743             :     LLVM_DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
     744             :   }
     745             :   llvm::sort(Headers);
     746             :   llvm::sort(Others);
     747             : }
     748             : 
     749           0 : static void createIrreducibleLoop(
     750             :     BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
     751             :     LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
     752             :     const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
     753             :   // Translate the SCC into RPO.
     754             :   LLVM_DEBUG(dbgs() << " - found-scc\n");
     755             : 
     756             :   LoopData::NodeList Headers;
     757             :   LoopData::NodeList Others;
     758           0 :   findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
     759             : 
     760           0 :   auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
     761           0 :                                 Headers.end(), Others.begin(), Others.end());
     762             : 
     763             :   // Update loop hierarchy.
     764           0 :   for (const auto &N : Loop->Nodes)
     765           0 :     if (BFI.Working[N.Index].isLoopHeader())
     766           0 :       BFI.Working[N.Index].Loop->Parent = &*Loop;
     767             :     else
     768           0 :       BFI.Working[N.Index].Loop = &*Loop;
     769           0 : }
     770             : 
     771             : iterator_range<std::list<LoopData>::iterator>
     772         184 : BlockFrequencyInfoImplBase::analyzeIrreducible(
     773             :     const IrreducibleGraph &G, LoopData *OuterLoop,
     774             :     std::list<LoopData>::iterator Insert) {
     775             :   assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
     776         184 :   auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
     777             : 
     778        1583 :   for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
     779        1399 :     if (I->size() < 2)
     780             :       continue;
     781             : 
     782             :     // Translate the SCC into RPO.
     783         187 :     createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
     784             :   }
     785             : 
     786         184 :   if (OuterLoop)
     787          51 :     return make_range(std::next(Prev), Insert);
     788         133 :   return make_range(Loops.begin(), Insert);
     789             : }
     790             : 
     791             : void
     792          51 : BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
     793             :   OuterLoop.Exits.clear();
     794         102 :   for (auto &Mass : OuterLoop.BackedgeMass)
     795          51 :     Mass = BlockMass::getEmpty();
     796          51 :   auto O = OuterLoop.Nodes.begin() + 1;
     797         749 :   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
     798        1396 :     if (!Working[I->Index].isPackaged())
     799         316 :       *O++ = *I;
     800             :   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
     801          51 : }
     802             : 
     803         181 : void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
     804             :   assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
     805             : 
     806             :   // Since the loop has more than one header block, the mass flowing back into
     807             :   // each header will be different. Adjust the mass in each header loop to
     808             :   // reflect the masses flowing through back edges.
     809             :   //
     810             :   // To do this, we distribute the initial mass using the backedge masses
     811             :   // as weights for the distribution.
     812             :   BlockMass LoopMass = BlockMass::getFull();
     813             :   Distribution Dist;
     814             : 
     815             :   LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
     816         849 :   for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
     817         668 :     auto &HeaderNode = Loop.Nodes[H];
     818         668 :     auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
     819             :     LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
     820             :                       << getBlockName(HeaderNode) << ": " << BackedgeMass
     821             :                       << "\n");
     822         668 :     if (BackedgeMass.getMass() > 0)
     823             :       Dist.addLocal(HeaderNode, BackedgeMass.getMass());
     824             :     else
     825             :       LLVM_DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
     826             :   }
     827             : 
     828             :   DitheringDistributer D(Dist, LoopMass);
     829             : 
     830             :   LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
     831             :                     << " to headers using above weights\n");
     832         849 :   for (const Weight &W : Dist.Weights) {
     833         668 :     BlockMass Taken = D.takeMass(W.Amount);
     834             :     assert(W.Type == Weight::Local && "all weights should be local");
     835        1336 :     Working[W.TargetNode.Index].getMass() = Taken;
     836             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     837             :   }
     838         181 : }
     839             : 
     840         187 : void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
     841             :   BlockMass LoopMass = BlockMass::getFull();
     842             :   DitheringDistributer D(Dist, LoopMass);
     843         873 :   for (const Weight &W : Dist.Weights) {
     844         686 :     BlockMass Taken = D.takeMass(W.Amount);
     845             :     assert(W.Type == Weight::Local && "all weights should be local");
     846        1372 :     Working[W.TargetNode.Index].getMass() = Taken;
     847             :     LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
     848             :   }
     849         187 : }

Generated by: LCOV version 1.13