LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineOutliner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 306 315 97.1 %
Date: 2018-07-13 00:08:38 Functions: 31 31 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : ///
      10             : /// \file
      11             : /// Replaces repeated sequences of instructions with function calls.
      12             : ///
      13             : /// This works by placing every instruction from every basic block in a
      14             : /// suffix tree, and repeatedly querying that tree for repeated sequences of
      15             : /// instructions. If a sequence of instructions appears often, then it ought
      16             : /// to be beneficial to pull out into a function.
      17             : ///
      18             : /// The MachineOutliner communicates with a given target using hooks defined in
      19             : /// TargetInstrInfo.h. The target supplies the outliner with information on how
      20             : /// a specific sequence of instructions should be outlined. This information
      21             : /// is used to deduce the number of instructions necessary to
      22             : ///
      23             : /// * Create an outlined function
      24             : /// * Call that outlined function
      25             : ///
      26             : /// Targets must implement
      27             : ///   * getOutliningCandidateInfo
      28             : ///   * buildOutlinedFrame
      29             : ///   * insertOutlinedCall
      30             : ///   * isFunctionSafeToOutlineFrom
      31             : ///
      32             : /// in order to make use of the MachineOutliner.
      33             : ///
      34             : /// This was originally presented at the 2016 LLVM Developers' Meeting in the
      35             : /// talk "Reducing Code Size Using Outlining". For a high-level overview of
      36             : /// how this pass works, the talk is available on YouTube at
      37             : ///
      38             : /// https://www.youtube.com/watch?v=yorld-WSOeU
      39             : ///
      40             : /// The slides for the talk are available at
      41             : ///
      42             : /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
      43             : ///
      44             : /// The talk provides an overview of how the outliner finds candidates and
      45             : /// ultimately outlines them. It describes how the main data structure for this
      46             : /// pass, the suffix tree, is queried and purged for candidates. It also gives
      47             : /// a simplified suffix tree construction algorithm for suffix trees based off
      48             : /// of the algorithm actually used here, Ukkonen's algorithm.
      49             : ///
      50             : /// For the original RFC for this pass, please see
      51             : ///
      52             : /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
      53             : ///
      54             : /// For more information on the suffix tree data structure, please see
      55             : /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
      56             : ///
      57             : //===----------------------------------------------------------------------===//
      58             : #include "llvm/CodeGen/MachineOutliner.h"
      59             : #include "llvm/ADT/DenseMap.h"
      60             : #include "llvm/ADT/Statistic.h"
      61             : #include "llvm/ADT/Twine.h"
      62             : #include "llvm/CodeGen/MachineFunction.h"
      63             : #include "llvm/CodeGen/MachineModuleInfo.h"
      64             : #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
      65             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      66             : #include "llvm/CodeGen/Passes.h"
      67             : #include "llvm/CodeGen/TargetInstrInfo.h"
      68             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      69             : #include "llvm/IR/DIBuilder.h"
      70             : #include "llvm/IR/IRBuilder.h"
      71             : #include "llvm/IR/Mangler.h"
      72             : #include "llvm/Support/Allocator.h"
      73             : #include "llvm/Support/CommandLine.h"
      74             : #include "llvm/Support/Debug.h"
      75             : #include "llvm/Support/raw_ostream.h"
      76             : #include <functional>
      77             : #include <map>
      78             : #include <sstream>
      79             : #include <tuple>
      80             : #include <vector>
      81             : 
      82             : #define DEBUG_TYPE "machine-outliner"
      83             : 
      84             : using namespace llvm;
      85             : using namespace ore;
      86             : using namespace outliner;
      87             : 
      88             : STATISTIC(NumOutlined, "Number of candidates outlined");
      89             : STATISTIC(FunctionsCreated, "Number of functions created");
      90             : 
      91             : // Set to true if the user wants the outliner to run on linkonceodr linkage
      92             : // functions. This is false by default because the linker can dedupe linkonceodr
      93             : // functions. Since the outliner is confined to a single module (modulo LTO),
      94             : // this is off by default. It should, however, be the default behaviour in
      95             : // LTO.
      96       99743 : static cl::opt<bool> EnableLinkOnceODROutlining(
      97             :     "enable-linkonceodr-outlining",
      98             :     cl::Hidden,
      99       99743 :     cl::desc("Enable the machine outliner on linkonceodr functions"),
     100      299229 :     cl::init(false));
     101             : 
     102             : namespace {
     103             : 
     104             : /// Represents an undefined index in the suffix tree.
     105             : const unsigned EmptyIdx = -1;
     106             : 
     107             : /// A node in a suffix tree which represents a substring or suffix.
     108             : ///
     109             : /// Each node has either no children or at least two children, with the root
     110             : /// being a exception in the empty tree.
     111             : ///
     112             : /// Children are represented as a map between unsigned integers and nodes. If
     113             : /// a node N has a child M on unsigned integer k, then the mapping represented
     114             : /// by N is a proper prefix of the mapping represented by M. Note that this,
     115             : /// although similar to a trie is somewhat different: each node stores a full
     116             : /// substring of the full mapping rather than a single character state.
     117             : ///
     118             : /// Each internal node contains a pointer to the internal node representing
     119             : /// the same string, but with the first character chopped off. This is stored
     120             : /// in \p Link. Each leaf node stores the start index of its respective
     121             : /// suffix in \p SuffixIdx.
     122        1087 : struct SuffixTreeNode {
     123             : 
     124             :   /// The children of this node.
     125             :   ///
     126             :   /// A child existing on an unsigned integer implies that from the mapping
     127             :   /// represented by the current node, there is a way to reach another
     128             :   /// mapping by tacking that character on the end of the current string.
     129             :   DenseMap<unsigned, SuffixTreeNode *> Children;
     130             : 
     131             :   /// A flag set to false if the node has been pruned from the tree.
     132             :   bool IsInTree = true;
     133             : 
     134             :   /// The start index of this node's substring in the main string.
     135             :   unsigned StartIdx = EmptyIdx;
     136             : 
     137             :   /// The end index of this node's substring in the main string.
     138             :   ///
     139             :   /// Every leaf node must have its \p EndIdx incremented at the end of every
     140             :   /// step in the construction algorithm. To avoid having to update O(N)
     141             :   /// nodes individually at the end of every step, the end index is stored
     142             :   /// as a pointer.
     143             :   unsigned *EndIdx = nullptr;
     144             : 
     145             :   /// For leaves, the start index of the suffix represented by this node.
     146             :   ///
     147             :   /// For all other nodes, this is ignored.
     148             :   unsigned SuffixIdx = EmptyIdx;
     149             : 
     150             :   /// For internal nodes, a pointer to the internal node representing
     151             :   /// the same sequence with the first character chopped off.
     152             :   ///
     153             :   /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
     154             :   /// Ukkonen's algorithm does to achieve linear-time construction is
     155             :   /// keep track of which node the next insert should be at. This makes each
     156             :   /// insert O(1), and there are a total of O(N) inserts. The suffix link
     157             :   /// helps with inserting children of internal nodes.
     158             :   ///
     159             :   /// Say we add a child to an internal node with associated mapping S. The
     160             :   /// next insertion must be at the node representing S - its first character.
     161             :   /// This is given by the way that we iteratively build the tree in Ukkonen's
     162             :   /// algorithm. The main idea is to look at the suffixes of each prefix in the
     163             :   /// string, starting with the longest suffix of the prefix, and ending with
     164             :   /// the shortest. Therefore, if we keep pointers between such nodes, we can
     165             :   /// move to the next insertion point in O(1) time. If we don't, then we'd
     166             :   /// have to query from the root, which takes O(N) time. This would make the
     167             :   /// construction algorithm O(N^2) rather than O(N).
     168             :   SuffixTreeNode *Link = nullptr;
     169             : 
     170             :   /// The parent of this node. Every node except for the root has a parent.
     171             :   SuffixTreeNode *Parent = nullptr;
     172             : 
     173             :   /// The number of times this node's string appears in the tree.
     174             :   ///
     175             :   /// This is equal to the number of leaf children of the string. It represents
     176             :   /// the number of suffixes that the node's string is a prefix of.
     177             :   unsigned OccurrenceCount = 0;
     178             : 
     179             :   /// The length of the string formed by concatenating the edge labels from the
     180             :   /// root to this node.
     181             :   unsigned ConcatLen = 0;
     182             : 
     183             :   /// Returns true if this node is a leaf.
     184             :   bool isLeaf() const { return SuffixIdx != EmptyIdx; }
     185             : 
     186             :   /// Returns true if this node is the root of its owning \p SuffixTree.
     187             :   bool isRoot() const { return StartIdx == EmptyIdx; }
     188             : 
     189             :   /// Return the number of elements in the substring associated with this node.
     190             :   size_t size() const {
     191             : 
     192             :     // Is it the root? If so, it's the empty string so return 0.
     193        1910 :     if (isRoot())
     194             :       return 0;
     195             : 
     196             :     assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
     197             : 
     198             :     // Size = the number of elements in the string.
     199             :     // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
     200        2977 :     return *EndIdx - StartIdx + 1;
     201             :   }
     202             : 
     203             :   SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
     204             :                  SuffixTreeNode *Parent)
     205        2174 :       : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
     206             : 
     207             :   SuffixTreeNode() {}
     208             : };
     209             : 
     210             : /// A data structure for fast substring queries.
     211             : ///
     212             : /// Suffix trees represent the suffixes of their input strings in their leaves.
     213             : /// A suffix tree is a type of compressed trie structure where each node
     214             : /// represents an entire substring rather than a single character. Each leaf
     215             : /// of the tree is a suffix.
     216             : ///
     217             : /// A suffix tree can be seen as a type of state machine where each state is a
     218             : /// substring of the full string. The tree is structured so that, for a string
     219             : /// of length N, there are exactly N leaves in the tree. This structure allows
     220             : /// us to quickly find repeated substrings of the input string.
     221             : ///
     222             : /// In this implementation, a "string" is a vector of unsigned integers.
     223             : /// These integers may result from hashing some data type. A suffix tree can
     224             : /// contain 1 or many strings, which can then be queried as one large string.
     225             : ///
     226             : /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
     227             : /// suffix tree construction. Ukkonen's algorithm is explained in more detail
     228             : /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
     229             : /// paper is available at
     230             : ///
     231             : /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
     232          40 : class SuffixTree {
     233             : public:
     234             :   /// Stores each leaf node in the tree.
     235             :   ///
     236             :   /// This is used for finding outlining candidates.
     237             :   std::vector<SuffixTreeNode *> LeafVector;
     238             : 
     239             :   /// Each element is an integer representing an instruction in the module.
     240             :   ArrayRef<unsigned> Str;
     241             : 
     242             : private:
     243             :   /// Maintains each node in the tree.
     244             :   SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
     245             : 
     246             :   /// The root of the suffix tree.
     247             :   ///
     248             :   /// The root represents the empty string. It is maintained by the
     249             :   /// \p NodeAllocator like every other node in the tree.
     250             :   SuffixTreeNode *Root = nullptr;
     251             : 
     252             :   /// Maintains the end indices of the internal nodes in the tree.
     253             :   ///
     254             :   /// Each internal node is guaranteed to never have its end index change
     255             :   /// during the construction algorithm; however, leaves must be updated at
     256             :   /// every step. Therefore, we need to store leaf end indices by reference
     257             :   /// to avoid updating O(N) leaves at every step of construction. Thus,
     258             :   /// every internal node must be allocated its own end index.
     259             :   BumpPtrAllocator InternalEndIdxAllocator;
     260             : 
     261             :   /// The end index of each leaf in the tree.
     262             :   unsigned LeafEndIdx = -1;
     263             : 
     264             :   /// Helper struct which keeps track of the next insertion point in
     265             :   /// Ukkonen's algorithm.
     266          20 :   struct ActiveState {
     267             :     /// The next node to insert at.
     268             :     SuffixTreeNode *Node;
     269             : 
     270             :     /// The index of the first character in the substring currently being added.
     271             :     unsigned Idx = EmptyIdx;
     272             : 
     273             :     /// The length of the substring we have to add at the current step.
     274             :     unsigned Len = 0;
     275             :   };
     276             : 
     277             :   /// The point the next insertion will take place at in the
     278             :   /// construction algorithm.
     279             :   ActiveState Active;
     280             : 
     281             :   /// Allocate a leaf node and add it to the tree.
     282             :   ///
     283             :   /// \param Parent The parent of this node.
     284             :   /// \param StartIdx The start index of this node's associated string.
     285             :   /// \param Edge The label on the edge leaving \p Parent to this node.
     286             :   ///
     287             :   /// \returns A pointer to the allocated leaf node.
     288         861 :   SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
     289             :                              unsigned Edge) {
     290             : 
     291             :     assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
     292             : 
     293             :     SuffixTreeNode *N = new (NodeAllocator.Allocate())
     294         861 :         SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
     295        1722 :     Parent.Children[Edge] = N;
     296             : 
     297         861 :     return N;
     298             :   }
     299             : 
     300             :   /// Allocate an internal node and add it to the tree.
     301             :   ///
     302             :   /// \param Parent The parent of this node. Only null when allocating the root.
     303             :   /// \param StartIdx The start index of this node's associated string.
     304             :   /// \param EndIdx The end index of this node's associated string.
     305             :   /// \param Edge The label on the edge leaving \p Parent to this node.
     306             :   ///
     307             :   /// \returns A pointer to the allocated internal node.
     308         226 :   SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
     309             :                                      unsigned EndIdx, unsigned Edge) {
     310             : 
     311             :     assert(StartIdx <= EndIdx && "String can't start after it ends!");
     312             :     assert(!(!Parent && StartIdx != EmptyIdx) &&
     313             :            "Non-root internal nodes must have parents!");
     314             : 
     315         452 :     unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
     316             :     SuffixTreeNode *N = new (NodeAllocator.Allocate())
     317         226 :         SuffixTreeNode(StartIdx, E, Root, Parent);
     318         226 :     if (Parent)
     319         412 :       Parent->Children[Edge] = N;
     320             : 
     321         226 :     return N;
     322             :   }
     323             : 
     324             :   /// Set the suffix indices of the leaves to the start indices of their
     325             :   /// respective suffixes. Also stores each leaf in \p LeafVector at its
     326             :   /// respective suffix index.
     327             :   ///
     328             :   /// \param[in] CurrNode The node currently being visited.
     329             :   /// \param CurrIdx The current index of the string being visited.
     330        1087 :   void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
     331             : 
     332        1087 :     bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
     333             : 
     334             :     // Store the length of the concatenation of all strings from the root to
     335             :     // this node.
     336        1087 :     if (!CurrNode.isRoot()) {
     337        1067 :       if (CurrNode.ConcatLen == 0)
     338        1067 :         CurrNode.ConcatLen = CurrNode.size();
     339             : 
     340        1067 :       if (CurrNode.Parent)
     341        1067 :         CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
     342             :     }
     343             : 
     344             :     // Traverse the tree depth-first.
     345        3241 :     for (auto &ChildPair : CurrNode.Children) {
     346             :       assert(ChildPair.second && "Node had a null child!");
     347        2134 :       setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
     348             :     }
     349             : 
     350             :     // Is this node a leaf?
     351        1087 :     if (IsLeaf) {
     352             :       // If yes, give it a suffix index and bump its parent's occurrence count.
     353         861 :       CurrNode.SuffixIdx = Str.size() - CurrIdx;
     354             :       assert(CurrNode.Parent && "CurrNode had no parent!");
     355         861 :       CurrNode.Parent->OccurrenceCount++;
     356             : 
     357             :       // Store the leaf in the leaf vector for pruning later.
     358        1722 :       LeafVector[CurrNode.SuffixIdx] = &CurrNode;
     359             :     }
     360        1087 :   }
     361             : 
     362             :   /// Construct the suffix tree for the prefix of the input ending at
     363             :   /// \p EndIdx.
     364             :   ///
     365             :   /// Used to construct the full suffix tree iteratively. At the end of each
     366             :   /// step, the constructed suffix tree is either a valid suffix tree, or a
     367             :   /// suffix tree with implicit suffixes. At the end of the final step, the
     368             :   /// suffix tree is a valid tree.
     369             :   ///
     370             :   /// \param EndIdx The end index of the current prefix in the main string.
     371             :   /// \param SuffixesToAdd The number of suffixes that must be added
     372             :   /// to complete the suffix tree at the current phase.
     373             :   ///
     374             :   /// \returns The number of suffixes that have not been added at the end of
     375             :   /// this step.
     376         861 :   unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
     377             :     SuffixTreeNode *NeedsLink = nullptr;
     378             : 
     379        1787 :     while (SuffixesToAdd > 0) {
     380             : 
     381             :       // Are we waiting to add anything other than just the last character?
     382        1228 :       if (Active.Len == 0) {
     383             :         // If not, then say the active index is the end index.
     384         761 :         Active.Idx = EndIdx;
     385             :       }
     386             : 
     387             :       assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
     388             : 
     389             :       // The first character in the current substring we're looking at.
     390        2456 :       unsigned FirstChar = Str[Active.Idx];
     391             : 
     392             :       // Have we inserted anything starting with FirstChar at the current node?
     393        1228 :       if (Active.Node->Children.count(FirstChar) == 0) {
     394             :         // If not, then we can just insert a leaf and move too the next step.
     395         655 :         insertLeaf(*Active.Node, EndIdx, FirstChar);
     396             : 
     397             :         // The active node is an internal node, and we visited it, so it must
     398             :         // need a link if it doesn't have one.
     399         655 :         if (NeedsLink) {
     400          58 :           NeedsLink->Link = Active.Node;
     401             :           NeedsLink = nullptr;
     402             :         }
     403             :       } else {
     404             :         // There's a match with FirstChar, so look for the point in the tree to
     405             :         // insert a new node.
     406        1146 :         SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
     407             : 
     408         573 :         unsigned SubstringLen = NextNode->size();
     409             : 
     410             :         // Is the current suffix we're trying to insert longer than the size of
     411             :         // the child we want to move to?
     412         638 :         if (Active.Len >= SubstringLen) {
     413             :           // If yes, then consume the characters we've seen and move to the next
     414             :           // node.
     415          65 :           Active.Idx += SubstringLen;
     416          65 :           Active.Len -= SubstringLen;
     417          65 :           Active.Node = NextNode;
     418          65 :           continue;
     419             :         }
     420             : 
     421             :         // Otherwise, the suffix we're trying to insert must be contained in the
     422             :         // next node we want to move to.
     423        1016 :         unsigned LastChar = Str[EndIdx];
     424             : 
     425             :         // Is the string we're trying to insert a substring of the next node?
     426        1016 :         if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
     427             :           // If yes, then we're done for this step. Remember our insertion point
     428             :           // and move to the next end index. At this point, we have an implicit
     429             :           // suffix tree.
     430         302 :           if (NeedsLink && !Active.Node->isRoot()) {
     431           0 :             NeedsLink->Link = Active.Node;
     432             :             NeedsLink = nullptr;
     433             :           }
     434             : 
     435         302 :           Active.Len++;
     436         302 :           break;
     437             :         }
     438             : 
     439             :         // The string we're trying to insert isn't a substring of the next node,
     440             :         // but matches up to a point. Split the node.
     441             :         //
     442             :         // For example, say we ended our search at a node n and we're trying to
     443             :         // insert ABD. Then we'll create a new node s for AB, reduce n to just
     444             :         // representing C, and insert a new leaf node l to represent d. This
     445             :         // allows us to ensure that if n was a leaf, it remains a leaf.
     446             :         //
     447             :         //   | ABC  ---split--->  | AB
     448             :         //   n                    s
     449             :         //                     C / \ D
     450             :         //                      n   l
     451             : 
     452             :         // The node s from the diagram
     453             :         SuffixTreeNode *SplitNode =
     454         206 :             insertInternalNode(Active.Node, NextNode->StartIdx,
     455         206 :                                NextNode->StartIdx + Active.Len - 1, FirstChar);
     456             : 
     457             :         // Insert the new node representing the new substring into the tree as
     458             :         // a child of the split node. This is the node l from the diagram.
     459         206 :         insertLeaf(*SplitNode, EndIdx, LastChar);
     460             : 
     461             :         // Make the old node a child of the split node and update its start
     462             :         // index. This is the node n from the diagram.
     463         206 :         NextNode->StartIdx += Active.Len;
     464         206 :         NextNode->Parent = SplitNode;
     465         618 :         SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
     466             : 
     467             :         // SplitNode is an internal node, update the suffix link.
     468         206 :         if (NeedsLink)
     469         144 :           NeedsLink->Link = SplitNode;
     470             : 
     471             :         NeedsLink = SplitNode;
     472             :       }
     473             : 
     474             :       // We've added something new to the tree, so there's one less suffix to
     475             :       // add.
     476         861 :       SuffixesToAdd--;
     477             : 
     478         861 :       if (Active.Node->isRoot()) {
     479         740 :         if (Active.Len > 0) {
     480         181 :           Active.Len--;
     481         181 :           Active.Idx = EndIdx - SuffixesToAdd + 1;
     482             :         }
     483             :       } else {
     484             :         // Start the next phase at the next smallest suffix.
     485         121 :         Active.Node = Active.Node->Link;
     486             :       }
     487             :     }
     488             : 
     489         861 :     return SuffixesToAdd;
     490             :   }
     491             : 
     492             : public:
     493             :   /// Construct a suffix tree from a sequence of unsigned integers.
     494             :   ///
     495             :   /// \param Str The string to construct the suffix tree for.
     496          60 :   SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
     497          20 :     Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
     498          20 :     Root->IsInTree = true;
     499          20 :     Active.Node = Root;
     500          60 :     LeafVector = std::vector<SuffixTreeNode *>(Str.size());
     501             : 
     502             :     // Keep track of the number of suffixes we have to add of the current
     503             :     // prefix.
     504             :     unsigned SuffixesToAdd = 0;
     505          20 :     Active.Node = Root;
     506             : 
     507             :     // Construct the suffix tree iteratively on each prefix of the string.
     508             :     // PfxEndIdx is the end index of the current prefix.
     509             :     // End is one past the last element in the string.
     510         901 :     for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
     511             :          PfxEndIdx++) {
     512         861 :       SuffixesToAdd++;
     513         861 :       LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
     514         861 :       SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
     515             :     }
     516             : 
     517             :     // Set the suffix indices of each leaf.
     518             :     assert(Root && "Root node can't be nullptr!");
     519          20 :     setSuffixIndices(*Root, 0);
     520          20 :   }
     521             : };
     522             : 
     523             : /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
     524          60 : struct InstructionMapper {
     525             : 
     526             :   /// The next available integer to assign to a \p MachineInstr that
     527             :   /// cannot be outlined.
     528             :   ///
     529             :   /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
     530             :   unsigned IllegalInstrNumber = -3;
     531             : 
     532             :   /// The next available integer to assign to a \p MachineInstr that can
     533             :   /// be outlined.
     534             :   unsigned LegalInstrNumber = 0;
     535             : 
     536             :   /// Correspondence from \p MachineInstrs to unsigned integers.
     537             :   DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
     538             :       InstructionIntegerMap;
     539             : 
     540             :   /// Corresponcence from unsigned integers to \p MachineInstrs.
     541             :   /// Inverse of \p InstructionIntegerMap.
     542             :   DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
     543             : 
     544             :   /// The vector of unsigned integers that the module is mapped to.
     545             :   std::vector<unsigned> UnsignedVec;
     546             : 
     547             :   /// Stores the location of the instruction associated with the integer
     548             :   /// at index i in \p UnsignedVec for each index i.
     549             :   std::vector<MachineBasicBlock::iterator> InstrList;
     550             : 
     551             :   /// Maps \p *It to a legal integer.
     552             :   ///
     553             :   /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
     554             :   /// \p IntegerInstructionMap, and \p LegalInstrNumber.
     555             :   ///
     556             :   /// \returns The integer that \p *It was mapped to.
     557         505 :   unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
     558             : 
     559             :     // Get the integer for this instruction or give it the current
     560             :     // LegalInstrNumber.
     561         505 :     InstrList.push_back(It);
     562             :     MachineInstr &MI = *It;
     563             :     bool WasInserted;
     564             :     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
     565             :         ResultIt;
     566             :     std::tie(ResultIt, WasInserted) =
     567        1010 :         InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
     568         505 :     unsigned MINumber = ResultIt->second;
     569             : 
     570             :     // There was an insertion.
     571         505 :     if (WasInserted) {
     572         203 :       LegalInstrNumber++;
     573         406 :       IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
     574             :     }
     575             : 
     576         505 :     UnsignedVec.push_back(MINumber);
     577             : 
     578             :     // Make sure we don't overflow or use any integers reserved by the DenseMap.
     579         505 :     if (LegalInstrNumber >= IllegalInstrNumber)
     580           0 :       report_fatal_error("Instruction mapping overflow!");
     581             : 
     582             :     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
     583             :            "Tried to assign DenseMap tombstone or empty key to instruction.");
     584             :     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
     585             :            "Tried to assign DenseMap tombstone or empty key to instruction.");
     586             : 
     587         505 :     return MINumber;
     588             :   }
     589             : 
     590             :   /// Maps \p *It to an illegal integer.
     591             :   ///
     592             :   /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
     593             :   ///
     594             :   /// \returns The integer that \p *It was mapped to.
     595         270 :   unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
     596         270 :     unsigned MINumber = IllegalInstrNumber;
     597             : 
     598         270 :     InstrList.push_back(It);
     599         270 :     UnsignedVec.push_back(IllegalInstrNumber);
     600         270 :     IllegalInstrNumber--;
     601             : 
     602             :     assert(LegalInstrNumber < IllegalInstrNumber &&
     603             :            "Instruction mapping overflow!");
     604             : 
     605             :     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
     606             :            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
     607             : 
     608             :     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
     609             :            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
     610             : 
     611         270 :     return MINumber;
     612             :   }
     613             : 
     614             :   /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
     615             :   /// and appends it to \p UnsignedVec and \p InstrList.
     616             :   ///
     617             :   /// Two instructions are assigned the same integer if they are identical.
     618             :   /// If an instruction is deemed unsafe to outline, then it will be assigned an
     619             :   /// unique integer. The resulting mapping is placed into a suffix tree and
     620             :   /// queried for candidates.
     621             :   ///
     622             :   /// \param MBB The \p MachineBasicBlock to be translated into integers.
     623             :   /// \param TRI \p TargetRegisterInfo for the module.
     624             :   /// \param TII \p TargetInstrInfo for the module.
     625          78 :   void convertToUnsignedVec(MachineBasicBlock &MBB,
     626             :                             const TargetRegisterInfo &TRI,
     627             :                             const TargetInstrInfo &TII) {
     628          78 :     unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
     629             : 
     630         933 :     for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
     631             :          It++) {
     632             : 
     633             :       // Keep track of where this instruction is in the module.
     634         777 :       switch (TII.getOutliningType(It, Flags)) {
     635         270 :       case InstrType::Illegal:
     636         270 :         mapToIllegalUnsigned(It);
     637             :         break;
     638             : 
     639         497 :       case InstrType::Legal:
     640         497 :         mapToLegalUnsigned(It);
     641             :         break;
     642             : 
     643           8 :       case InstrType::LegalTerminator:
     644           8 :         mapToLegalUnsigned(It);
     645           8 :         InstrList.push_back(It);
     646           8 :         UnsignedVec.push_back(IllegalInstrNumber);
     647           8 :         IllegalInstrNumber--;
     648             :         break;
     649             : 
     650             :       case InstrType::Invisible:
     651             :         break;
     652             :       }
     653             :     }
     654             : 
     655             :     // After we're done every insertion, uniquely terminate this part of the
     656             :     // "string". This makes sure we won't match across basic block or function
     657             :     // boundaries since the "end" is encoded uniquely and thus appears in no
     658             :     // repeated substring.
     659         156 :     InstrList.push_back(MBB.end());
     660          78 :     UnsignedVec.push_back(IllegalInstrNumber);
     661          78 :     IllegalInstrNumber--;
     662          78 :   }
     663             : 
     664          40 :   InstructionMapper() {
     665             :     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
     666             :     // changed.
     667             :     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
     668             :            "DenseMapInfo<unsigned>'s empty key isn't -1!");
     669             :     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
     670             :            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
     671          20 :   }
     672             : };
     673             : 
     674             : /// An interprocedural pass which finds repeated sequences of
     675             : /// instructions and replaces them with calls to functions.
     676             : ///
     677             : /// Each instruction is mapped to an unsigned integer and placed in a string.
     678             : /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
     679             : /// is then repeatedly queried for repeated sequences of instructions. Each
     680             : /// non-overlapping repeated sequence is then placed in its own
     681             : /// \p MachineFunction and each instance is then replaced with a call to that
     682             : /// function.
     683          60 : struct MachineOutliner : public ModulePass {
     684             : 
     685             :   static char ID;
     686             : 
     687             :   /// Set to true if the outliner should consider functions with
     688             :   /// linkonceodr linkage.
     689             :   bool OutlineFromLinkOnceODRs = false;
     690             : 
     691             :   /// Set to true if the outliner should run on all functions in the module
     692             :   /// considered safe for outlining.
     693             :   /// Set to true by default for compatibility with llc's -run-pass option.
     694             :   /// Set when the pass is constructed in TargetPassConfig.
     695             :   bool RunOnAllFunctions = true;
     696             : 
     697             :   // Collection of IR functions created by the outliner.
     698             :   std::vector<Function *> CreatedIRFunctions;
     699             : 
     700          21 :   StringRef getPassName() const override { return "Machine Outliner"; }
     701             : 
     702          20 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     703             :     AU.addRequired<MachineModuleInfo>();
     704             :     AU.addPreserved<MachineModuleInfo>();
     705             :     AU.setPreservesAll();
     706          20 :     ModulePass::getAnalysisUsage(AU);
     707          20 :   }
     708             : 
     709          40 :   MachineOutliner() : ModulePass(ID) {
     710          20 :     initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
     711          20 :   }
     712             : 
     713             :   /// Find all repeated substrings that satisfy the outlining cost model.
     714             :   ///
     715             :   /// If a substring appears at least twice, then it must be represented by
     716             :   /// an internal node which appears in at least two suffixes. Each suffix is
     717             :   /// represented by a leaf node. To do this, we visit each internal node in
     718             :   /// the tree, using the leaf children of each internal node. If an internal
     719             :   /// node represents a beneficial substring, then we use each of its leaf
     720             :   /// children to find the locations of its substring.
     721             :   ///
     722             :   /// \param ST A suffix tree to query.
     723             :   /// \param TII TargetInstrInfo for the target.
     724             :   /// \param Mapper Contains outlining mapping information.
     725             :   /// \param[out] CandidateList Filled with candidates representing each
     726             :   /// beneficial substring.
     727             :   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
     728             :   /// type of candidate.
     729             :   ///
     730             :   /// \returns The length of the longest candidate found.
     731             :   unsigned
     732             :   findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
     733             :                  InstructionMapper &Mapper,
     734             :                  std::vector<std::shared_ptr<Candidate>> &CandidateList,
     735             :                  std::vector<OutlinedFunction> &FunctionList);
     736             : 
     737             :   /// Replace the sequences of instructions represented by the
     738             :   /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
     739             :   /// described in \p FunctionList.
     740             :   ///
     741             :   /// \param M The module we are outlining from.
     742             :   /// \param CandidateList A list of candidates to be outlined.
     743             :   /// \param FunctionList A list of functions to be inserted into the module.
     744             :   /// \param Mapper Contains the instruction mappings for the module.
     745             :   bool outline(Module &M,
     746             :                const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
     747             :                std::vector<OutlinedFunction> &FunctionList,
     748             :                InstructionMapper &Mapper);
     749             : 
     750             :   /// Creates a function for \p OF and inserts it into the module.
     751             :   MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
     752             :                                           InstructionMapper &Mapper);
     753             : 
     754             :   /// Find potential outlining candidates and store them in \p CandidateList.
     755             :   ///
     756             :   /// For each type of potential candidate, also build an \p OutlinedFunction
     757             :   /// struct containing the information to build the function for that
     758             :   /// candidate.
     759             :   ///
     760             :   /// \param[out] CandidateList Filled with outlining candidates for the module.
     761             :   /// \param[out] FunctionList Filled with functions corresponding to each type
     762             :   /// of \p Candidate.
     763             :   /// \param ST The suffix tree for the module.
     764             :   /// \param TII TargetInstrInfo for the module.
     765             :   ///
     766             :   /// \returns The length of the longest candidate found. 0 if there are none.
     767             :   unsigned
     768             :   buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
     769             :                      std::vector<OutlinedFunction> &FunctionList,
     770             :                      SuffixTree &ST, InstructionMapper &Mapper,
     771             :                      const TargetInstrInfo &TII);
     772             : 
     773             :   /// Helper function for pruneOverlaps.
     774             :   /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
     775             :   void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
     776             : 
     777             :   /// Remove any overlapping candidates that weren't handled by the
     778             :   /// suffix tree's pruning method.
     779             :   ///
     780             :   /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
     781             :   /// If a short candidate is chosen for outlining, then a longer candidate
     782             :   /// which has that short candidate as a suffix is chosen, the tree's pruning
     783             :   /// method will not find it. Thus, we need to prune before outlining as well.
     784             :   ///
     785             :   /// \param[in,out] CandidateList A list of outlining candidates.
     786             :   /// \param[in,out] FunctionList A list of functions to be outlined.
     787             :   /// \param Mapper Contains instruction mapping info for outlining.
     788             :   /// \param MaxCandidateLen The length of the longest candidate.
     789             :   /// \param TII TargetInstrInfo for the module.
     790             :   void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
     791             :                      std::vector<OutlinedFunction> &FunctionList,
     792             :                      InstructionMapper &Mapper, unsigned MaxCandidateLen,
     793             :                      const TargetInstrInfo &TII);
     794             : 
     795             :   /// Construct a suffix tree on the instructions in \p M and outline repeated
     796             :   /// strings from that tree.
     797             :   bool runOnModule(Module &M) override;
     798             : 
     799             :   /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
     800             :   /// function for remark emission.
     801          23 :   DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
     802             :     DISubprogram *SP;
     803          23 :     for (const std::shared_ptr<Candidate> &C : OF.Candidates)
     804          96 :       if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
     805             :         return SP;
     806             :     return nullptr;
     807             :   }
     808             : };
     809             : 
     810             : } // Anonymous namespace.
     811             : 
     812             : char MachineOutliner::ID = 0;
     813             : 
     814             : namespace llvm {
     815          15 : ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
     816          15 :   MachineOutliner *OL = new MachineOutliner();
     817          15 :   OL->RunOnAllFunctions = RunOnAllFunctions;
     818          15 :   return OL;
     819             : }
     820             : 
     821             : } // namespace llvm
     822             : 
     823      146650 : INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
     824             :                 false)
     825             : 
     826          20 : unsigned MachineOutliner::findCandidates(
     827             :     SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
     828             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
     829             :     std::vector<OutlinedFunction> &FunctionList) {
     830          20 :   CandidateList.clear();
     831             :   FunctionList.clear();
     832             :   unsigned MaxLen = 0;
     833             : 
     834             :   // FIXME: Visit internal nodes instead of leaves.
     835         881 :   for (SuffixTreeNode *Leaf : ST.LeafVector) {
     836             :     assert(Leaf && "Leaves in LeafVector cannot be null!");
     837         861 :     if (!Leaf->IsInTree)
     838         793 :       continue;
     839             : 
     840             :     assert(Leaf->Parent && "All leaves must have parents!");
     841         676 :     SuffixTreeNode &Parent = *(Leaf->Parent);
     842             : 
     843             :     // If it doesn't appear enough, or we already outlined from it, skip it.
     844         676 :     if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
     845             :       continue;
     846             : 
     847             :     // Figure out if this candidate is beneficial.
     848         540 :     unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
     849             : 
     850             :     // Too short to be beneficial; skip it.
     851             :     // FIXME: This isn't necessarily true for, say, X86. If we factor in
     852             :     // instruction lengths we need more information than this.
     853         270 :     if (StringLen < 2)
     854             :       continue;
     855             : 
     856             :     // If this is a beneficial class of candidate, then every one is stored in
     857             :     // this vector.
     858          68 :     std::vector<Candidate> CandidatesForRepeatedSeq;
     859             : 
     860             :     // Figure out the call overhead for each instance of the sequence.
     861         587 :     for (auto &ChildPair : Parent.Children) {
     862         321 :       SuffixTreeNode *M = ChildPair.second;
     863             : 
     864         321 :       if (M && M->IsInTree && M->isLeaf()) {
     865             :         // Never visit this leaf again.
     866         318 :         M->IsInTree = false;
     867         318 :         unsigned StartIdx = M->SuffixIdx;
     868         318 :         unsigned EndIdx = StartIdx + StringLen - 1;
     869             : 
     870             :         // Trick: Discard some candidates that would be incompatible with the
     871             :         // ones we've already found for this sequence. This will save us some
     872             :         // work in candidate selection.
     873             :         //
     874             :         // If two candidates overlap, then we can't outline them both. This
     875             :         // happens when we have candidates that look like, say
     876             :         //
     877             :         // AA (where each "A" is an instruction).
     878             :         //
     879             :         // We might have some portion of the module that looks like this:
     880             :         // AAAAAA (6 A's) 
     881             :         //
     882             :         // In this case, there are 5 different copies of "AA" in this range, but
     883             :         // at most 3 can be outlined. If only outlining 3 of these is going to
     884             :         // be unbeneficial, then we ought to not bother.
     885             :         //
     886             :         // Note that two things DON'T overlap when they look like this:
     887             :         // start1...end1 .... start2...end2
     888             :         // That is, one must either
     889             :         // * End before the other starts
     890             :         // * Start after the other ends
     891         318 :         if (std::all_of(CandidatesForRepeatedSeq.begin(),
     892             :                         CandidatesForRepeatedSeq.end(),
     893             :                         [&StartIdx, &EndIdx](const Candidate &C) {
     894         484 :                           return (EndIdx < C.getStartIdx() ||
     895         161 :                                   StartIdx > C.getEndIdx());
     896             :                         })) {
     897             :           // It doesn't overlap with anything, so we can outline it.
     898             :           // Each sequence is over [StartIt, EndIt].
     899             :           // Save the candidate and its location.
     900             : 
     901         636 :           MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
     902         636 :           MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
     903             : 
     904         318 :           CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
     905         636 :                                                 EndIt, StartIt->getParent(),
     906         954 :                                                 FunctionList.size());
     907             :         }
     908             :       }
     909             :     }
     910             : 
     911             :     // We've found something we might want to outline.
     912             :     // Create an OutlinedFunction to store it and check if it'd be beneficial
     913             :     // to outline.
     914             :     TargetCostInfo TCI =
     915         133 :         TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq);
     916             :     std::vector<unsigned> Seq;
     917         818 :     for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
     918        1370 :       Seq.push_back(ST.Str[i]);
     919         266 :     OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
     920         334 :                         Seq, TCI);
     921             :     unsigned Benefit = OF.getBenefit();
     922             : 
     923             :     // Is it better to outline this candidate than not?
     924          90 :     if (Benefit < 1) {
     925             :       // Outlining this candidate would take more instructions than not
     926             :       // outlining.
     927             :       // Emit a remark explaining why we didn't outline this candidate.
     928             :       Candidate &C = CandidatesForRepeatedSeq.front();
     929          65 :       MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
     930          79 :       MORE.emit([&]() {
     931             :         MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
     932          42 :                                           C.front()->getDebugLoc(), C.getMBB());
     933          42 :         R << "Did not outline " << NV("Length", StringLen) << " instructions"
     934          70 :           << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
     935             :           << " locations."
     936             :           << " Bytes from outlining all occurrences ("
     937          56 :           << NV("OutliningCost", OF.getOutliningCost()) << ")"
     938             :           << " >= Unoutlined instruction bytes ("
     939          42 :           << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
     940          56 :           << " (Also found at: ";
     941             : 
     942             :         // Tell the user the other places the candidate was found.
     943          42 :         for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
     944          56 :           R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
     945          28 :                   CandidatesForRepeatedSeq[i].front()->getDebugLoc());
     946          14 :           if (i != e - 1)
     947             :             R << ", ";
     948             :         }
     949             : 
     950             :         R << ")";
     951          14 :         return R;
     952             :       });
     953             : 
     954             :       // Move to the next candidate.
     955          65 :       continue;
     956             :     }
     957             : 
     958          68 :     if (StringLen > MaxLen)
     959             :       MaxLen = StringLen;
     960             : 
     961             :     // At this point, the candidate class is seen as beneficial. Set their
     962             :     // benefit values and save them in the candidate list.
     963          68 :     std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
     964         234 :     for (Candidate &C : CandidatesForRepeatedSeq) {
     965         166 :       C.Benefit = Benefit;
     966         166 :       C.TCI = TCI;
     967             :       std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
     968         166 :       CandidateList.push_back(Cptr);
     969         166 :       CandidatesForFn.push_back(Cptr);
     970             :     }
     971             : 
     972          68 :     FunctionList.push_back(OF);
     973          68 :     FunctionList.back().Candidates = CandidatesForFn;
     974             : 
     975             :     // Move to the next function.
     976          68 :     Parent.IsInTree = false;
     977             :   }
     978             : 
     979          20 :   return MaxLen;
     980             : }
     981             : 
     982             : // Remove C from the candidate space, and update its OutlinedFunction.
     983             : void MachineOutliner::prune(Candidate &C,
     984             :                             std::vector<OutlinedFunction> &FunctionList) {
     985             :   // Get the OutlinedFunction associated with this Candidate.
     986          55 :   OutlinedFunction &F = FunctionList[C.FunctionIdx];
     987             : 
     988             :   // Update C's associated function's occurrence count.
     989             :   F.decrement();
     990             : 
     991             :   // Remove C from the CandidateList.
     992         110 :   C.InCandidateList = false;
     993             : 
     994             :   LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
     995             :              dbgs() << "--- Num fns left for candidate: "
     996             :                     << F.getOccurrenceCount() << "\n";
     997             :              dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
     998             :                     << "\n";);
     999             : }
    1000             : 
    1001          20 : void MachineOutliner::pruneOverlaps(
    1002             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
    1003             :     std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
    1004             :     unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
    1005             : 
    1006             :   // Return true if this candidate became unbeneficial for outlining in a
    1007             :   // previous step.
    1008         469 :   auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
    1009             : 
    1010             :     // Check if the candidate was removed in a previous step.
    1011         245 :     if (!C.InCandidateList)
    1012             :       return true;
    1013             : 
    1014             :     // C must be alive. Check if we should remove it.
    1015         623 :     if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
    1016             :       prune(C, FunctionList);
    1017             :       return true;
    1018             :     }
    1019             : 
    1020             :     // C is in the list, and F is still beneficial.
    1021             :     return false;
    1022          20 :   };
    1023             : 
    1024             :   // TODO: Experiment with interval trees or other interval-checking structures
    1025             :   // to lower the time complexity of this function.
    1026             :   // TODO: Can we do better than the simple greedy choice?
    1027             :   // Check for overlaps in the range.
    1028             :   // This is O(MaxCandidateLen * CandidateList.size()).
    1029         186 :   for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
    1030             :        It++) {
    1031             :     Candidate &C1 = **It;
    1032             : 
    1033             :     // If C1 was already pruned, or its function is no longer beneficial for
    1034             :     // outlining, move to the next candidate.
    1035         166 :     if (ShouldSkipCandidate(C1))
    1036             :       continue;
    1037             : 
    1038             :     // The minimum start index of any candidate that could overlap with this
    1039             :     // one.
    1040             :     unsigned FarthestPossibleIdx = 0;
    1041             : 
    1042             :     // Either the index is 0, or it's at most MaxCandidateLen indices away.
    1043         111 :     if (C1.getStartIdx() > MaxCandidateLen)
    1044          98 :       FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
    1045             : 
    1046             :     // Compare against the candidates in the list that start at most
    1047             :     // FarthestPossibleIdx indices away from C1. There are at most
    1048             :     // MaxCandidateLen of these.
    1049         135 :     for (auto Sit = It + 1; Sit != Et; Sit++) {
    1050             :       Candidate &C2 = **Sit;
    1051             : 
    1052             :       // Is this candidate too far away to overlap?
    1053         116 :       if (C2.getStartIdx() < FarthestPossibleIdx)
    1054             :         break;
    1055             : 
    1056             :       // If C2 was already pruned, or its function is no longer beneficial for
    1057             :       // outlining, move to the next candidate.
    1058          79 :       if (ShouldSkipCandidate(C2))
    1059             :         continue;
    1060             : 
    1061             :       // Do C1 and C2 overlap?
    1062             :       //
    1063             :       // Not overlapping:
    1064             :       // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
    1065             :       //
    1066             :       // We sorted our candidate list so C2Start <= C1Start. We know that
    1067             :       // C2End > C2Start since each candidate has length >= 2. Therefore, all we
    1068             :       // have to check is C2End < C2Start to see if we overlap.
    1069         116 :       if (C2.getEndIdx() < C1.getStartIdx())
    1070             :         continue;
    1071             : 
    1072             :       // C1 and C2 overlap.
    1073             :       // We need to choose the better of the two.
    1074             :       //
    1075             :       // Approximate this by picking the one which would have saved us the
    1076             :       // most instructions before any pruning.
    1077             : 
    1078             :       // Is C2 a better candidate?
    1079          55 :       if (C2.Benefit > C1.Benefit) {
    1080             :         // Yes, so prune C1. Since C1 is dead, we don't have to compare it
    1081             :         // against anything anymore, so break.
    1082          55 :         prune(C1, FunctionList);
    1083             :         break;
    1084             :       }
    1085             : 
    1086             :       // Prune C2 and move on to the next candidate.
    1087           0 :       prune(C2, FunctionList);
    1088             :     }
    1089             :   }
    1090          20 : }
    1091             : 
    1092          20 : unsigned MachineOutliner::buildCandidateList(
    1093             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
    1094             :     std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
    1095             :     InstructionMapper &Mapper, const TargetInstrInfo &TII) {
    1096             : 
    1097             :   std::vector<unsigned> CandidateSequence; // Current outlining candidate.
    1098             :   unsigned MaxCandidateLen = 0;            // Length of the longest candidate.
    1099             : 
    1100          20 :   MaxCandidateLen =
    1101             :       findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
    1102             : 
    1103             :   // Sort the candidates in decending order. This will simplify the outlining
    1104             :   // process when we have to remove the candidates from the mapping by
    1105             :   // allowing us to cut them out without keeping track of an offset.
    1106             :   std::stable_sort(
    1107             :       CandidateList.begin(), CandidateList.end(),
    1108             :       [](const std::shared_ptr<Candidate> &LHS,
    1109             :          const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
    1110             : 
    1111          20 :   return MaxCandidateLen;
    1112             : }
    1113             : 
    1114             : MachineFunction *
    1115          23 : MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
    1116             :                                         InstructionMapper &Mapper) {
    1117             : 
    1118             :   // Create the function name. This should be unique. For now, just hash the
    1119             :   // module name and include it in the function name plus the number of this
    1120             :   // function.
    1121          46 :   std::ostringstream NameStream;
    1122          23 :   NameStream << "OUTLINED_FUNCTION_" << OF.Name;
    1123             : 
    1124             :   // Create the function using an IR-level function.
    1125          23 :   LLVMContext &C = M.getContext();
    1126          23 :   Function *F = dyn_cast<Function>(
    1127          46 :       M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
    1128             :   assert(F && "Function was null!");
    1129             : 
    1130             :   // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
    1131             :   // which gives us better results when we outline from linkonceodr functions.
    1132          23 :   F->setLinkage(GlobalValue::InternalLinkage);
    1133             :   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    1134             : 
    1135             :   // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
    1136             :   // necessary.
    1137             : 
    1138             :   // Set optsize/minsize, so we don't insert padding between outlined
    1139             :   // functions.
    1140             :   F->addFnAttr(Attribute::OptimizeForSize);
    1141          23 :   F->addFnAttr(Attribute::MinSize);
    1142             : 
    1143             :   // Save F so that we can add debug info later if we need to.
    1144          23 :   CreatedIRFunctions.push_back(F);
    1145             : 
    1146          46 :   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
    1147             :   IRBuilder<> Builder(EntryBB);
    1148          23 :   Builder.CreateRetVoid();
    1149             : 
    1150          23 :   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
    1151          23 :   MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
    1152          23 :   MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
    1153          23 :   const TargetSubtargetInfo &STI = MF.getSubtarget();
    1154          23 :   const TargetInstrInfo &TII = *STI.getInstrInfo();
    1155             : 
    1156             :   // Insert the new function into the module.
    1157             :   MF.insert(MF.begin(), &MBB);
    1158             : 
    1159             :   // Copy over the instructions for the function using the integer mappings in
    1160             :   // its sequence.
    1161          23 :   for (unsigned Str : OF.Sequence) {
    1162             :     MachineInstr *NewMI =
    1163         151 :         MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
    1164             :     NewMI->dropMemRefs();
    1165             : 
    1166             :     // Don't keep debug information for outlined instructions.
    1167         302 :     NewMI->setDebugLoc(DebugLoc());
    1168             :     MBB.insert(MBB.end(), NewMI);
    1169             :   }
    1170             : 
    1171          23 :   TII.buildOutlinedFrame(MBB, MF, OF.TCI);
    1172             : 
    1173             :   // If there's a DISubprogram associated with this outlined function, then
    1174             :   // emit debug info for the outlined function.
    1175          23 :   if (DISubprogram *SP = getSubprogramOrNull(OF)) {
    1176             :     // We have a DISubprogram. Get its DICompileUnit.
    1177             :     DICompileUnit *CU = SP->getUnit();
    1178          12 :     DIBuilder DB(M, true, CU);
    1179             :     DIFile *Unit = SP->getFile();
    1180             :     Mangler Mg;
    1181             : 
    1182             :     // Walk over each IR function we created in the outliner and create
    1183             :     // DISubprograms for each function.
    1184          13 :     for (Function *F : CreatedIRFunctions) {
    1185             :       // Get the mangled name of the function for the linkage name.
    1186             :       std::string Dummy;
    1187           7 :       llvm::raw_string_ostream MangledNameStream(Dummy);
    1188           7 :       Mg.getNameWithPrefix(MangledNameStream, F, false);
    1189             : 
    1190          21 :       DISubprogram *SP = DB.createFunction(
    1191             :           Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
    1192             :           Unit /* File */,
    1193             :           0 /* Line 0 is reserved for compiler-generated code. */,
    1194             :           DB.createSubroutineType(
    1195             :               DB.getOrCreateTypeArray(None)), /* void type */
    1196             :           false, true, 0, /* Line 0 is reserved for compiler-generated code. */
    1197             :           DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
    1198           7 :           true /* Outlined code is optimized code by definition. */);
    1199             : 
    1200             :       // Don't add any new variables to the subprogram.
    1201           7 :       DB.finalizeSubprogram(SP);
    1202             : 
    1203             :       // Attach subprogram to the function.
    1204           7 :       F->setSubprogram(SP);
    1205             :     }
    1206             : 
    1207             :     // We're done with the DIBuilder.
    1208           6 :     DB.finalize();
    1209             :   }
    1210             : 
    1211             :   // Outlined functions shouldn't preserve liveness.
    1212             :   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
    1213          23 :   MF.getRegInfo().freezeReservedRegs(MF);
    1214          23 :   return &MF;
    1215             : }
    1216             : 
    1217          20 : bool MachineOutliner::outline(
    1218             :     Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
    1219             :     std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
    1220             : 
    1221             :   bool OutlinedSomething = false;
    1222             :   // Replace the candidates with calls to their respective outlined functions.
    1223         372 :   for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
    1224             :     Candidate &C = *Cptr;
    1225             :     // Was the candidate removed during pruneOverlaps?
    1226         166 :     if (!C.InCandidateList)
    1227         220 :       continue;
    1228             : 
    1229             :     // If not, then look at its OutlinedFunction.
    1230          56 :     OutlinedFunction &OF = FunctionList[C.FunctionIdx];
    1231             : 
    1232             :     // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
    1233          56 :     if (OF.getBenefit() < 1)
    1234           0 :       continue;
    1235             : 
    1236             :     // Does this candidate have a function yet?
    1237          56 :     if (!OF.MF) {
    1238          23 :       OF.MF = createOutlinedFunction(M, OF, Mapper);
    1239             :       MachineBasicBlock *MBB = &*OF.MF->begin();
    1240             : 
    1241             :       // Output a remark telling the user that an outlined function was created,
    1242             :       // and explaining where it came from.
    1243             :       MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
    1244             :       MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
    1245          69 :                                   MBB->findDebugLoc(MBB->begin()), MBB);
    1246          46 :       R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
    1247             :         << " bytes by "
    1248          69 :         << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
    1249          69 :         << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
    1250             :         << " locations. "
    1251          69 :         << "(Found at: ";
    1252             : 
    1253             :       // Tell the user the other places the candidate was found.
    1254         158 :       for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
    1255             : 
    1256             :         // Skip over things that were pruned.
    1257         112 :         if (!OF.Candidates[i]->InCandidateList)
    1258           0 :           continue;
    1259             : 
    1260         112 :         R << NV(
    1261         112 :             (Twine("StartLoc") + Twine(i)).str(),
    1262             :             OF.Candidates[i]->front()->getDebugLoc());
    1263          56 :         if (i != e - 1)
    1264             :           R << ", ";
    1265             :       }
    1266             : 
    1267             :       R << ")";
    1268             : 
    1269          23 :       MORE.emit(R);
    1270             :       FunctionsCreated++;
    1271             :     }
    1272             : 
    1273          56 :     MachineFunction *MF = OF.MF;
    1274          56 :     MachineBasicBlock &MBB = *C.getMBB();
    1275          56 :     MachineBasicBlock::iterator StartIt = C.front();
    1276          56 :     MachineBasicBlock::iterator EndIt = C.back();
    1277             :     assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
    1278             :     assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
    1279             : 
    1280          56 :     const TargetSubtargetInfo &STI = MF->getSubtarget();
    1281          56 :     const TargetInstrInfo &TII = *STI.getInstrInfo();
    1282             : 
    1283             :     // Insert a call to the new function and erase the old sequence.
    1284          56 :     auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C.TCI);
    1285             : 
    1286             :     // If the caller tracks liveness, then we need to make sure that anything
    1287             :     // we outline doesn't break liveness assumptions.
    1288             :     // The outlined functions themselves currently don't track liveness, but
    1289             :     // we should make sure that the ranges we yank things out of aren't
    1290             :     // wrong.
    1291         112 :     if (MBB.getParent()->getProperties().hasProperty(
    1292             :             MachineFunctionProperties::Property::TracksLiveness)) {
    1293             :       // Helper lambda for adding implicit def operands to the call instruction.
    1294         630 :       auto CopyDefs = [&CallInst](MachineInstr &MI) {
    1295        2897 :         for (MachineOperand &MOP : MI.operands()) {
    1296             :           // Skip over anything that isn't a register.
    1297        1264 :           if (!MOP.isReg())
    1298         479 :             continue;
    1299             : 
    1300             :           // If it's a def, add it to the call instruction.
    1301         785 :           if (MOP.isDef())
    1302         261 :             CallInst->addOperand(
    1303         261 :                 MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
    1304         261 :                                           true /* isImp = true */));
    1305             :         }
    1306         425 :       };
    1307             : 
    1308             :       // Copy over the defs in the outlined range.
    1309             :       // First inst in outlined range <-- Anything that's defined in this
    1310             :       // ...                           .. range has to be added as an implicit
    1311             :       // Last inst in outlined range  <-- def to the call instruction.
    1312          56 :       std::for_each(CallInst, EndIt, CopyDefs);
    1313             :     }
    1314             : 
    1315             :     // Erase from the point after where the call was inserted up to, and
    1316             :     // including, the final instruction in the sequence.
    1317             :     // Erase needs one past the end, so we need std::next there too.
    1318             :     MBB.erase(std::next(StartIt), std::next(EndIt));
    1319             :     OutlinedSomething = true;
    1320             : 
    1321             :     // Statistics.
    1322             :     NumOutlined++;
    1323             :   }
    1324             : 
    1325             :   LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
    1326             : 
    1327          20 :   return OutlinedSomething;
    1328             : }
    1329             : 
    1330          20 : bool MachineOutliner::runOnModule(Module &M) {
    1331             :   // Check if there's anything in the module. If it's empty, then there's
    1332             :   // nothing to outline.
    1333          20 :   if (M.empty())
    1334             :     return false;
    1335             : 
    1336          20 :   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
    1337             :   const TargetSubtargetInfo &STI =
    1338          20 :       MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
    1339          20 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    1340          20 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    1341             : 
    1342             :   // If the user passed -enable-machine-outliner=always or
    1343             :   // -enable-machine-outliner, the pass will run on all functions in the module.
    1344             :   // Otherwise, if the target supports default outlining, it will run on all
    1345             :   // functions deemed by the target to be worth outlining from by default. Tell
    1346             :   // the user how the outliner is running.
    1347             :   LLVM_DEBUG(
    1348             :     dbgs() << "Machine Outliner: Running on ";
    1349             :     if (RunOnAllFunctions)
    1350             :       dbgs() << "all functions";
    1351             :     else
    1352             :       dbgs() << "target-default functions";
    1353             :     dbgs() << "\n"
    1354             :   );
    1355             : 
    1356             :   // If the user specifies that they want to outline from linkonceodrs, set
    1357             :   // it here.
    1358          20 :   OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
    1359             : 
    1360          40 :   InstructionMapper Mapper;
    1361             : 
    1362             :   // Build instruction mappings for each function in the module. Start by
    1363             :   // iterating over each Function in M.
    1364         107 :   for (Function &F : M) {
    1365             : 
    1366             :     // If there's nothing in F, then there's no reason to try and outline from
    1367             :     // it.
    1368          87 :     if (F.empty())
    1369          22 :       continue;
    1370             : 
    1371             :     // There's something in F. Check if it has a MachineFunction associated with
    1372             :     // it.
    1373          65 :     MachineFunction *MF = MMI.getMachineFunction(F);
    1374             : 
    1375             :     // If it doesn't, then there's nothing to outline from. Move to the next
    1376             :     // Function.
    1377          65 :     if (!MF)
    1378           0 :       continue;
    1379             : 
    1380          65 :     if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
    1381           0 :       continue;
    1382             : 
    1383             :     // We have a MachineFunction. Ask the target if it's suitable for outlining.
    1384             :     // If it isn't, then move on to the next Function in the module.
    1385          65 :     if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
    1386           8 :       continue;
    1387             : 
    1388             :     // We have a function suitable for outlining. Iterate over every
    1389             :     // MachineBasicBlock in MF and try to map its instructions to a list of
    1390             :     // unsigned integers.
    1391         135 :     for (MachineBasicBlock &MBB : *MF) {
    1392             :       // If there isn't anything in MBB, then there's no point in outlining from
    1393             :       // it.
    1394          78 :       if (MBB.empty())
    1395           0 :         continue;
    1396             : 
    1397             :       // Check if MBB could be the target of an indirect branch. If it is, then
    1398             :       // we don't want to outline from it.
    1399          78 :       if (MBB.hasAddressTaken())
    1400           0 :         continue;
    1401             : 
    1402             :       // MBB is suitable for outlining. Map it to a list of unsigneds.
    1403          78 :       Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
    1404             :     }
    1405             :   }
    1406             : 
    1407             :   // Construct a suffix tree, use it to find candidates, and then outline them.
    1408          40 :   SuffixTree ST(Mapper.UnsignedVec);
    1409          20 :   std::vector<std::shared_ptr<Candidate>> CandidateList;
    1410          20 :   std::vector<OutlinedFunction> FunctionList;
    1411             : 
    1412             :   // Find all of the outlining candidates.
    1413             :   unsigned MaxCandidateLen =
    1414          20 :       buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
    1415             : 
    1416             :   // Remove candidates that overlap with other candidates.
    1417          20 :   pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
    1418             : 
    1419             :   // Outline each of the candidates and return true if something was outlined.
    1420          20 :   bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
    1421             : 
    1422             :   return OutlinedSomething;
    1423      299229 : }

Generated by: LCOV version 1.13