LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineOutliner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 304 312 97.4 %
Date: 2018-06-17 00:07:59 Functions: 31 31 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : ///
      10             : /// \file
      11             : /// Replaces repeated sequences of instructions with function calls.
      12             : ///
      13             : /// This works by placing every instruction from every basic block in a
      14             : /// suffix tree, and repeatedly querying that tree for repeated sequences of
      15             : /// instructions. If a sequence of instructions appears often, then it ought
      16             : /// to be beneficial to pull out into a function.
      17             : ///
      18             : /// The MachineOutliner communicates with a given target using hooks defined in
      19             : /// TargetInstrInfo.h. The target supplies the outliner with information on how
      20             : /// a specific sequence of instructions should be outlined. This information
      21             : /// is used to deduce the number of instructions necessary to
      22             : ///
      23             : /// * Create an outlined function
      24             : /// * Call that outlined function
      25             : ///
      26             : /// Targets must implement
      27             : ///   * getOutliningCandidateInfo
      28             : ///   * insertOutlinerEpilogue
      29             : ///   * insertOutlinedCall
      30             : ///   * insertOutlinerPrologue
      31             : ///   * isFunctionSafeToOutlineFrom
      32             : ///
      33             : /// in order to make use of the MachineOutliner.
      34             : ///
      35             : /// This was originally presented at the 2016 LLVM Developers' Meeting in the
      36             : /// talk "Reducing Code Size Using Outlining". For a high-level overview of
      37             : /// how this pass works, the talk is available on YouTube at
      38             : ///
      39             : /// https://www.youtube.com/watch?v=yorld-WSOeU
      40             : ///
      41             : /// The slides for the talk are available at
      42             : ///
      43             : /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
      44             : ///
      45             : /// The talk provides an overview of how the outliner finds candidates and
      46             : /// ultimately outlines them. It describes how the main data structure for this
      47             : /// pass, the suffix tree, is queried and purged for candidates. It also gives
      48             : /// a simplified suffix tree construction algorithm for suffix trees based off
      49             : /// of the algorithm actually used here, Ukkonen's algorithm.
      50             : ///
      51             : /// For the original RFC for this pass, please see
      52             : ///
      53             : /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
      54             : ///
      55             : /// For more information on the suffix tree data structure, please see
      56             : /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
      57             : ///
      58             : //===----------------------------------------------------------------------===//
      59             : #include "llvm/CodeGen/MachineOutliner.h"
      60             : #include "llvm/ADT/DenseMap.h"
      61             : #include "llvm/ADT/Statistic.h"
      62             : #include "llvm/ADT/Twine.h"
      63             : #include "llvm/CodeGen/MachineFunction.h"
      64             : #include "llvm/CodeGen/MachineModuleInfo.h"
      65             : #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
      66             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      67             : #include "llvm/CodeGen/Passes.h"
      68             : #include "llvm/CodeGen/TargetInstrInfo.h"
      69             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      70             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      71             : #include "llvm/IR/DIBuilder.h"
      72             : #include "llvm/IR/IRBuilder.h"
      73             : #include "llvm/IR/Mangler.h"
      74             : #include "llvm/Support/Allocator.h"
      75             : #include "llvm/Support/CommandLine.h"
      76             : #include "llvm/Support/Debug.h"
      77             : #include "llvm/Support/raw_ostream.h"
      78             : #include <functional>
      79             : #include <map>
      80             : #include <sstream>
      81             : #include <tuple>
      82             : #include <vector>
      83             : 
      84             : #define DEBUG_TYPE "machine-outliner"
      85             : 
      86             : using namespace llvm;
      87             : using namespace ore;
      88             : using namespace outliner;
      89             : 
      90             : STATISTIC(NumOutlined, "Number of candidates outlined");
      91             : STATISTIC(FunctionsCreated, "Number of functions created");
      92             : 
      93             : // Set to true if the user wants the outliner to run on linkonceodr linkage
      94             : // functions. This is false by default because the linker can dedupe linkonceodr
      95             : // functions. Since the outliner is confined to a single module (modulo LTO),
      96             : // this is off by default. It should, however, be the default behaviour in
      97             : // LTO.
      98      101169 : static cl::opt<bool> EnableLinkOnceODROutlining(
      99             :     "enable-linkonceodr-outlining",
     100             :     cl::Hidden,
     101      101169 :     cl::desc("Enable the machine outliner on linkonceodr functions"),
     102      303507 :     cl::init(false));
     103             : 
     104             : namespace {
     105             : 
     106             : /// Represents an undefined index in the suffix tree.
     107             : const unsigned EmptyIdx = -1;
     108             : 
     109             : /// A node in a suffix tree which represents a substring or suffix.
     110             : ///
     111             : /// Each node has either no children or at least two children, with the root
     112             : /// being a exception in the empty tree.
     113             : ///
     114             : /// Children are represented as a map between unsigned integers and nodes. If
     115             : /// a node N has a child M on unsigned integer k, then the mapping represented
     116             : /// by N is a proper prefix of the mapping represented by M. Note that this,
     117             : /// although similar to a trie is somewhat different: each node stores a full
     118             : /// substring of the full mapping rather than a single character state.
     119             : ///
     120             : /// Each internal node contains a pointer to the internal node representing
     121             : /// the same string, but with the first character chopped off. This is stored
     122             : /// in \p Link. Each leaf node stores the start index of its respective
     123             : /// suffix in \p SuffixIdx.
     124         978 : struct SuffixTreeNode {
     125             : 
     126             :   /// The children of this node.
     127             :   ///
     128             :   /// A child existing on an unsigned integer implies that from the mapping
     129             :   /// represented by the current node, there is a way to reach another
     130             :   /// mapping by tacking that character on the end of the current string.
     131             :   DenseMap<unsigned, SuffixTreeNode *> Children;
     132             : 
     133             :   /// A flag set to false if the node has been pruned from the tree.
     134             :   bool IsInTree = true;
     135             : 
     136             :   /// The start index of this node's substring in the main string.
     137             :   unsigned StartIdx = EmptyIdx;
     138             : 
     139             :   /// The end index of this node's substring in the main string.
     140             :   ///
     141             :   /// Every leaf node must have its \p EndIdx incremented at the end of every
     142             :   /// step in the construction algorithm. To avoid having to update O(N)
     143             :   /// nodes individually at the end of every step, the end index is stored
     144             :   /// as a pointer.
     145             :   unsigned *EndIdx = nullptr;
     146             : 
     147             :   /// For leaves, the start index of the suffix represented by this node.
     148             :   ///
     149             :   /// For all other nodes, this is ignored.
     150             :   unsigned SuffixIdx = EmptyIdx;
     151             : 
     152             :   /// For internal nodes, a pointer to the internal node representing
     153             :   /// the same sequence with the first character chopped off.
     154             :   ///
     155             :   /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
     156             :   /// Ukkonen's algorithm does to achieve linear-time construction is
     157             :   /// keep track of which node the next insert should be at. This makes each
     158             :   /// insert O(1), and there are a total of O(N) inserts. The suffix link
     159             :   /// helps with inserting children of internal nodes.
     160             :   ///
     161             :   /// Say we add a child to an internal node with associated mapping S. The
     162             :   /// next insertion must be at the node representing S - its first character.
     163             :   /// This is given by the way that we iteratively build the tree in Ukkonen's
     164             :   /// algorithm. The main idea is to look at the suffixes of each prefix in the
     165             :   /// string, starting with the longest suffix of the prefix, and ending with
     166             :   /// the shortest. Therefore, if we keep pointers between such nodes, we can
     167             :   /// move to the next insertion point in O(1) time. If we don't, then we'd
     168             :   /// have to query from the root, which takes O(N) time. This would make the
     169             :   /// construction algorithm O(N^2) rather than O(N).
     170             :   SuffixTreeNode *Link = nullptr;
     171             : 
     172             :   /// The parent of this node. Every node except for the root has a parent.
     173             :   SuffixTreeNode *Parent = nullptr;
     174             : 
     175             :   /// The number of times this node's string appears in the tree.
     176             :   ///
     177             :   /// This is equal to the number of leaf children of the string. It represents
     178             :   /// the number of suffixes that the node's string is a prefix of.
     179             :   unsigned OccurrenceCount = 0;
     180             : 
     181             :   /// The length of the string formed by concatenating the edge labels from the
     182             :   /// root to this node.
     183             :   unsigned ConcatLen = 0;
     184             : 
     185             :   /// Returns true if this node is a leaf.
     186             :   bool isLeaf() const { return SuffixIdx != EmptyIdx; }
     187             : 
     188             :   /// Returns true if this node is the root of its owning \p SuffixTree.
     189             :   bool isRoot() const { return StartIdx == EmptyIdx; }
     190             : 
     191             :   /// Return the number of elements in the substring associated with this node.
     192             :   size_t size() const {
     193             : 
     194             :     // Is it the root? If so, it's the empty string so return 0.
     195        1706 :     if (isRoot())
     196             :       return 0;
     197             : 
     198             :     assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
     199             : 
     200             :     // Size = the number of elements in the string.
     201             :     // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
     202        2666 :     return *EndIdx - StartIdx + 1;
     203             :   }
     204             : 
     205             :   SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
     206             :                  SuffixTreeNode *Parent)
     207        1956 :       : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
     208             : 
     209             :   SuffixTreeNode() {}
     210             : };
     211             : 
     212             : /// A data structure for fast substring queries.
     213             : ///
     214             : /// Suffix trees represent the suffixes of their input strings in their leaves.
     215             : /// A suffix tree is a type of compressed trie structure where each node
     216             : /// represents an entire substring rather than a single character. Each leaf
     217             : /// of the tree is a suffix.
     218             : ///
     219             : /// A suffix tree can be seen as a type of state machine where each state is a
     220             : /// substring of the full string. The tree is structured so that, for a string
     221             : /// of length N, there are exactly N leaves in the tree. This structure allows
     222             : /// us to quickly find repeated substrings of the input string.
     223             : ///
     224             : /// In this implementation, a "string" is a vector of unsigned integers.
     225             : /// These integers may result from hashing some data type. A suffix tree can
     226             : /// contain 1 or many strings, which can then be queried as one large string.
     227             : ///
     228             : /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
     229             : /// suffix tree construction. Ukkonen's algorithm is explained in more detail
     230             : /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
     231             : /// paper is available at
     232             : ///
     233             : /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
     234          36 : class SuffixTree {
     235             : public:
     236             :   /// Stores each leaf node in the tree.
     237             :   ///
     238             :   /// This is used for finding outlining candidates.
     239             :   std::vector<SuffixTreeNode *> LeafVector;
     240             : 
     241             :   /// Each element is an integer representing an instruction in the module.
     242             :   ArrayRef<unsigned> Str;
     243             : 
     244             : private:
     245             :   /// Maintains each node in the tree.
     246             :   SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
     247             : 
     248             :   /// The root of the suffix tree.
     249             :   ///
     250             :   /// The root represents the empty string. It is maintained by the
     251             :   /// \p NodeAllocator like every other node in the tree.
     252             :   SuffixTreeNode *Root = nullptr;
     253             : 
     254             :   /// Maintains the end indices of the internal nodes in the tree.
     255             :   ///
     256             :   /// Each internal node is guaranteed to never have its end index change
     257             :   /// during the construction algorithm; however, leaves must be updated at
     258             :   /// every step. Therefore, we need to store leaf end indices by reference
     259             :   /// to avoid updating O(N) leaves at every step of construction. Thus,
     260             :   /// every internal node must be allocated its own end index.
     261             :   BumpPtrAllocator InternalEndIdxAllocator;
     262             : 
     263             :   /// The end index of each leaf in the tree.
     264             :   unsigned LeafEndIdx = -1;
     265             : 
     266             :   /// Helper struct which keeps track of the next insertion point in
     267             :   /// Ukkonen's algorithm.
     268          18 :   struct ActiveState {
     269             :     /// The next node to insert at.
     270             :     SuffixTreeNode *Node;
     271             : 
     272             :     /// The index of the first character in the substring currently being added.
     273             :     unsigned Idx = EmptyIdx;
     274             : 
     275             :     /// The length of the substring we have to add at the current step.
     276             :     unsigned Len = 0;
     277             :   };
     278             : 
     279             :   /// The point the next insertion will take place at in the
     280             :   /// construction algorithm.
     281             :   ActiveState Active;
     282             : 
     283             :   /// Allocate a leaf node and add it to the tree.
     284             :   ///
     285             :   /// \param Parent The parent of this node.
     286             :   /// \param StartIdx The start index of this node's associated string.
     287             :   /// \param Edge The label on the edge leaving \p Parent to this node.
     288             :   ///
     289             :   /// \returns A pointer to the allocated leaf node.
     290         764 :   SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
     291             :                              unsigned Edge) {
     292             : 
     293             :     assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
     294             : 
     295             :     SuffixTreeNode *N = new (NodeAllocator.Allocate())
     296         764 :         SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
     297        1528 :     Parent.Children[Edge] = N;
     298             : 
     299         764 :     return N;
     300             :   }
     301             : 
     302             :   /// Allocate an internal node and add it to the tree.
     303             :   ///
     304             :   /// \param Parent The parent of this node. Only null when allocating the root.
     305             :   /// \param StartIdx The start index of this node's associated string.
     306             :   /// \param EndIdx The end index of this node's associated string.
     307             :   /// \param Edge The label on the edge leaving \p Parent to this node.
     308             :   ///
     309             :   /// \returns A pointer to the allocated internal node.
     310         214 :   SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
     311             :                                      unsigned EndIdx, unsigned Edge) {
     312             : 
     313             :     assert(StartIdx <= EndIdx && "String can't start after it ends!");
     314             :     assert(!(!Parent && StartIdx != EmptyIdx) &&
     315             :            "Non-root internal nodes must have parents!");
     316             : 
     317         428 :     unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
     318             :     SuffixTreeNode *N = new (NodeAllocator.Allocate())
     319         214 :         SuffixTreeNode(StartIdx, E, Root, Parent);
     320         214 :     if (Parent)
     321         392 :       Parent->Children[Edge] = N;
     322             : 
     323         214 :     return N;
     324             :   }
     325             : 
     326             :   /// Set the suffix indices of the leaves to the start indices of their
     327             :   /// respective suffixes. Also stores each leaf in \p LeafVector at its
     328             :   /// respective suffix index.
     329             :   ///
     330             :   /// \param[in] CurrNode The node currently being visited.
     331             :   /// \param CurrIdx The current index of the string being visited.
     332         978 :   void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
     333             : 
     334         978 :     bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
     335             : 
     336             :     // Store the length of the concatenation of all strings from the root to
     337             :     // this node.
     338         978 :     if (!CurrNode.isRoot()) {
     339         960 :       if (CurrNode.ConcatLen == 0)
     340         960 :         CurrNode.ConcatLen = CurrNode.size();
     341             : 
     342         960 :       if (CurrNode.Parent)
     343         960 :         CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
     344             :     }
     345             : 
     346             :     // Traverse the tree depth-first.
     347        2916 :     for (auto &ChildPair : CurrNode.Children) {
     348             :       assert(ChildPair.second && "Node had a null child!");
     349        1920 :       setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
     350             :     }
     351             : 
     352             :     // Is this node a leaf?
     353         978 :     if (IsLeaf) {
     354             :       // If yes, give it a suffix index and bump its parent's occurrence count.
     355         764 :       CurrNode.SuffixIdx = Str.size() - CurrIdx;
     356             :       assert(CurrNode.Parent && "CurrNode had no parent!");
     357         764 :       CurrNode.Parent->OccurrenceCount++;
     358             : 
     359             :       // Store the leaf in the leaf vector for pruning later.
     360        1528 :       LeafVector[CurrNode.SuffixIdx] = &CurrNode;
     361             :     }
     362         978 :   }
     363             : 
     364             :   /// Construct the suffix tree for the prefix of the input ending at
     365             :   /// \p EndIdx.
     366             :   ///
     367             :   /// Used to construct the full suffix tree iteratively. At the end of each
     368             :   /// step, the constructed suffix tree is either a valid suffix tree, or a
     369             :   /// suffix tree with implicit suffixes. At the end of the final step, the
     370             :   /// suffix tree is a valid tree.
     371             :   ///
     372             :   /// \param EndIdx The end index of the current prefix in the main string.
     373             :   /// \param SuffixesToAdd The number of suffixes that must be added
     374             :   /// to complete the suffix tree at the current phase.
     375             :   ///
     376             :   /// \returns The number of suffixes that have not been added at the end of
     377             :   /// this step.
     378         764 :   unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
     379             :     SuffixTreeNode *NeedsLink = nullptr;
     380             : 
     381        1580 :     while (SuffixesToAdd > 0) {
     382             : 
     383             :       // Are we waiting to add anything other than just the last character?
     384        1071 :       if (Active.Len == 0) {
     385             :         // If not, then say the active index is the end index.
     386         657 :         Active.Idx = EndIdx;
     387             :       }
     388             : 
     389             :       assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
     390             : 
     391             :       // The first character in the current substring we're looking at.
     392        2142 :       unsigned FirstChar = Str[Active.Idx];
     393             : 
     394             :       // Have we inserted anything starting with FirstChar at the current node?
     395        1071 :       if (Active.Node->Children.count(FirstChar) == 0) {
     396             :         // If not, then we can just insert a leaf and move too the next step.
     397         568 :         insertLeaf(*Active.Node, EndIdx, FirstChar);
     398             : 
     399             :         // The active node is an internal node, and we visited it, so it must
     400             :         // need a link if it doesn't have one.
     401         568 :         if (NeedsLink) {
     402          54 :           NeedsLink->Link = Active.Node;
     403             :           NeedsLink = nullptr;
     404             :         }
     405             :       } else {
     406             :         // There's a match with FirstChar, so look for the point in the tree to
     407             :         // insert a new node.
     408        1006 :         SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
     409             : 
     410         503 :         unsigned SubstringLen = NextNode->size();
     411             : 
     412             :         // Is the current suffix we're trying to insert longer than the size of
     413             :         // the child we want to move to?
     414         555 :         if (Active.Len >= SubstringLen) {
     415             :           // If yes, then consume the characters we've seen and move to the next
     416             :           // node.
     417          52 :           Active.Idx += SubstringLen;
     418          52 :           Active.Len -= SubstringLen;
     419          52 :           Active.Node = NextNode;
     420          52 :           continue;
     421             :         }
     422             : 
     423             :         // Otherwise, the suffix we're trying to insert must be contained in the
     424             :         // next node we want to move to.
     425         902 :         unsigned LastChar = Str[EndIdx];
     426             : 
     427             :         // Is the string we're trying to insert a substring of the next node?
     428         902 :         if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
     429             :           // If yes, then we're done for this step. Remember our insertion point
     430             :           // and move to the next end index. At this point, we have an implicit
     431             :           // suffix tree.
     432         255 :           if (NeedsLink && !Active.Node->isRoot()) {
     433           0 :             NeedsLink->Link = Active.Node;
     434             :             NeedsLink = nullptr;
     435             :           }
     436             : 
     437         255 :           Active.Len++;
     438         255 :           break;
     439             :         }
     440             : 
     441             :         // The string we're trying to insert isn't a substring of the next node,
     442             :         // but matches up to a point. Split the node.
     443             :         //
     444             :         // For example, say we ended our search at a node n and we're trying to
     445             :         // insert ABD. Then we'll create a new node s for AB, reduce n to just
     446             :         // representing C, and insert a new leaf node l to represent d. This
     447             :         // allows us to ensure that if n was a leaf, it remains a leaf.
     448             :         //
     449             :         //   | ABC  ---split--->  | AB
     450             :         //   n                    s
     451             :         //                     C / \ D
     452             :         //                      n   l
     453             : 
     454             :         // The node s from the diagram
     455             :         SuffixTreeNode *SplitNode =
     456         196 :             insertInternalNode(Active.Node, NextNode->StartIdx,
     457         196 :                                NextNode->StartIdx + Active.Len - 1, FirstChar);
     458             : 
     459             :         // Insert the new node representing the new substring into the tree as
     460             :         // a child of the split node. This is the node l from the diagram.
     461         196 :         insertLeaf(*SplitNode, EndIdx, LastChar);
     462             : 
     463             :         // Make the old node a child of the split node and update its start
     464             :         // index. This is the node n from the diagram.
     465         196 :         NextNode->StartIdx += Active.Len;
     466         196 :         NextNode->Parent = SplitNode;
     467         588 :         SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
     468             : 
     469             :         // SplitNode is an internal node, update the suffix link.
     470         196 :         if (NeedsLink)
     471         138 :           NeedsLink->Link = SplitNode;
     472             : 
     473             :         NeedsLink = SplitNode;
     474             :       }
     475             : 
     476             :       // We've added something new to the tree, so there's one less suffix to
     477             :       // add.
     478         764 :       SuffixesToAdd--;
     479             : 
     480         764 :       if (Active.Node->isRoot()) {
     481         680 :         if (Active.Len > 0) {
     482         171 :           Active.Len--;
     483         171 :           Active.Idx = EndIdx - SuffixesToAdd + 1;
     484             :         }
     485             :       } else {
     486             :         // Start the next phase at the next smallest suffix.
     487          84 :         Active.Node = Active.Node->Link;
     488             :       }
     489             :     }
     490             : 
     491         764 :     return SuffixesToAdd;
     492             :   }
     493             : 
     494             : public:
     495             :   /// Construct a suffix tree from a sequence of unsigned integers.
     496             :   ///
     497             :   /// \param Str The string to construct the suffix tree for.
     498          54 :   SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
     499          18 :     Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
     500          18 :     Root->IsInTree = true;
     501          18 :     Active.Node = Root;
     502          54 :     LeafVector = std::vector<SuffixTreeNode *>(Str.size());
     503             : 
     504             :     // Keep track of the number of suffixes we have to add of the current
     505             :     // prefix.
     506             :     unsigned SuffixesToAdd = 0;
     507          18 :     Active.Node = Root;
     508             : 
     509             :     // Construct the suffix tree iteratively on each prefix of the string.
     510             :     // PfxEndIdx is the end index of the current prefix.
     511             :     // End is one past the last element in the string.
     512         800 :     for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
     513             :          PfxEndIdx++) {
     514         764 :       SuffixesToAdd++;
     515         764 :       LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
     516         764 :       SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
     517             :     }
     518             : 
     519             :     // Set the suffix indices of each leaf.
     520             :     assert(Root && "Root node can't be nullptr!");
     521          18 :     setSuffixIndices(*Root, 0);
     522          18 :   }
     523             : };
     524             : 
     525             : /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
     526          54 : struct InstructionMapper {
     527             : 
     528             :   /// The next available integer to assign to a \p MachineInstr that
     529             :   /// cannot be outlined.
     530             :   ///
     531             :   /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
     532             :   unsigned IllegalInstrNumber = -3;
     533             : 
     534             :   /// The next available integer to assign to a \p MachineInstr that can
     535             :   /// be outlined.
     536             :   unsigned LegalInstrNumber = 0;
     537             : 
     538             :   /// Correspondence from \p MachineInstrs to unsigned integers.
     539             :   DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
     540             :       InstructionIntegerMap;
     541             : 
     542             :   /// Corresponcence from unsigned integers to \p MachineInstrs.
     543             :   /// Inverse of \p InstructionIntegerMap.
     544             :   DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
     545             : 
     546             :   /// The vector of unsigned integers that the module is mapped to.
     547             :   std::vector<unsigned> UnsignedVec;
     548             : 
     549             :   /// Stores the location of the instruction associated with the integer
     550             :   /// at index i in \p UnsignedVec for each index i.
     551             :   std::vector<MachineBasicBlock::iterator> InstrList;
     552             : 
     553             :   /// Maps \p *It to a legal integer.
     554             :   ///
     555             :   /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
     556             :   /// \p IntegerInstructionMap, and \p LegalInstrNumber.
     557             :   ///
     558             :   /// \returns The integer that \p *It was mapped to.
     559         442 :   unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
     560             : 
     561             :     // Get the integer for this instruction or give it the current
     562             :     // LegalInstrNumber.
     563         442 :     InstrList.push_back(It);
     564             :     MachineInstr &MI = *It;
     565             :     bool WasInserted;
     566             :     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
     567             :         ResultIt;
     568             :     std::tie(ResultIt, WasInserted) =
     569         884 :         InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
     570         442 :     unsigned MINumber = ResultIt->second;
     571             : 
     572             :     // There was an insertion.
     573         442 :     if (WasInserted) {
     574         187 :       LegalInstrNumber++;
     575         374 :       IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
     576             :     }
     577             : 
     578         442 :     UnsignedVec.push_back(MINumber);
     579             : 
     580             :     // Make sure we don't overflow or use any integers reserved by the DenseMap.
     581         442 :     if (LegalInstrNumber >= IllegalInstrNumber)
     582           0 :       report_fatal_error("Instruction mapping overflow!");
     583             : 
     584             :     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
     585             :            "Tried to assign DenseMap tombstone or empty key to instruction.");
     586             :     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
     587             :            "Tried to assign DenseMap tombstone or empty key to instruction.");
     588             : 
     589         442 :     return MINumber;
     590             :   }
     591             : 
     592             :   /// Maps \p *It to an illegal integer.
     593             :   ///
     594             :   /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
     595             :   ///
     596             :   /// \returns The integer that \p *It was mapped to.
     597         254 :   unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
     598         254 :     unsigned MINumber = IllegalInstrNumber;
     599             : 
     600         254 :     InstrList.push_back(It);
     601         254 :     UnsignedVec.push_back(IllegalInstrNumber);
     602         254 :     IllegalInstrNumber--;
     603             : 
     604             :     assert(LegalInstrNumber < IllegalInstrNumber &&
     605             :            "Instruction mapping overflow!");
     606             : 
     607             :     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
     608             :            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
     609             : 
     610             :     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
     611             :            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
     612             : 
     613         254 :     return MINumber;
     614             :   }
     615             : 
     616             :   /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
     617             :   /// and appends it to \p UnsignedVec and \p InstrList.
     618             :   ///
     619             :   /// Two instructions are assigned the same integer if they are identical.
     620             :   /// If an instruction is deemed unsafe to outline, then it will be assigned an
     621             :   /// unique integer. The resulting mapping is placed into a suffix tree and
     622             :   /// queried for candidates.
     623             :   ///
     624             :   /// \param MBB The \p MachineBasicBlock to be translated into integers.
     625             :   /// \param TRI \p TargetRegisterInfo for the module.
     626             :   /// \param TII \p TargetInstrInfo for the module.
     627          62 :   void convertToUnsignedVec(MachineBasicBlock &MBB,
     628             :                             const TargetRegisterInfo &TRI,
     629             :                             const TargetInstrInfo &TII) {
     630          62 :     unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
     631             : 
     632         822 :     for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
     633             :          It++) {
     634             : 
     635             :       // Keep track of where this instruction is in the module.
     636         698 :       switch (TII.getOutliningType(It, Flags)) {
     637         254 :       case InstrType::Illegal:
     638         254 :         mapToIllegalUnsigned(It);
     639             :         break;
     640             : 
     641         436 :       case InstrType::Legal:
     642         436 :         mapToLegalUnsigned(It);
     643             :         break;
     644             : 
     645           6 :       case InstrType::LegalTerminator:
     646           6 :         mapToLegalUnsigned(It);
     647           6 :         InstrList.push_back(It);
     648           6 :         UnsignedVec.push_back(IllegalInstrNumber);
     649           6 :         IllegalInstrNumber--;
     650             :         break;
     651             : 
     652             :       case InstrType::Invisible:
     653             :         break;
     654             :       }
     655             :     }
     656             : 
     657             :     // After we're done every insertion, uniquely terminate this part of the
     658             :     // "string". This makes sure we won't match across basic block or function
     659             :     // boundaries since the "end" is encoded uniquely and thus appears in no
     660             :     // repeated substring.
     661         124 :     InstrList.push_back(MBB.end());
     662          62 :     UnsignedVec.push_back(IllegalInstrNumber);
     663          62 :     IllegalInstrNumber--;
     664          62 :   }
     665             : 
     666          36 :   InstructionMapper() {
     667             :     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
     668             :     // changed.
     669             :     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
     670             :            "DenseMapInfo<unsigned>'s empty key isn't -1!");
     671             :     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
     672             :            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
     673          18 :   }
     674             : };
     675             : 
     676             : /// An interprocedural pass which finds repeated sequences of
     677             : /// instructions and replaces them with calls to functions.
     678             : ///
     679             : /// Each instruction is mapped to an unsigned integer and placed in a string.
     680             : /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
     681             : /// is then repeatedly queried for repeated sequences of instructions. Each
     682             : /// non-overlapping repeated sequence is then placed in its own
     683             : /// \p MachineFunction and each instance is then replaced with a call to that
     684             : /// function.
     685          54 : struct MachineOutliner : public ModulePass {
     686             : 
     687             :   static char ID;
     688             : 
     689             :   /// Set to true if the outliner should consider functions with
     690             :   /// linkonceodr linkage.
     691             :   bool OutlineFromLinkOnceODRs = false;
     692             : 
     693             :   // Collection of IR functions created by the outliner.
     694             :   std::vector<Function *> CreatedIRFunctions;
     695             : 
     696          19 :   StringRef getPassName() const override { return "Machine Outliner"; }
     697             : 
     698          18 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     699             :     AU.addRequired<MachineModuleInfo>();
     700             :     AU.addPreserved<MachineModuleInfo>();
     701             :     AU.setPreservesAll();
     702          18 :     ModulePass::getAnalysisUsage(AU);
     703          18 :   }
     704             : 
     705          36 :   MachineOutliner() : ModulePass(ID) {
     706          18 :     initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
     707          18 :   }
     708             : 
     709             :   /// Find all repeated substrings that satisfy the outlining cost model.
     710             :   ///
     711             :   /// If a substring appears at least twice, then it must be represented by
     712             :   /// an internal node which appears in at least two suffixes. Each suffix is
     713             :   /// represented by a leaf node. To do this, we visit each internal node in
     714             :   /// the tree, using the leaf children of each internal node. If an internal
     715             :   /// node represents a beneficial substring, then we use each of its leaf
     716             :   /// children to find the locations of its substring.
     717             :   ///
     718             :   /// \param ST A suffix tree to query.
     719             :   /// \param TII TargetInstrInfo for the target.
     720             :   /// \param Mapper Contains outlining mapping information.
     721             :   /// \param[out] CandidateList Filled with candidates representing each
     722             :   /// beneficial substring.
     723             :   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
     724             :   /// type of candidate.
     725             :   ///
     726             :   /// \returns The length of the longest candidate found.
     727             :   unsigned
     728             :   findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
     729             :                  InstructionMapper &Mapper,
     730             :                  std::vector<std::shared_ptr<Candidate>> &CandidateList,
     731             :                  std::vector<OutlinedFunction> &FunctionList);
     732             : 
     733             :   /// Replace the sequences of instructions represented by the
     734             :   /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
     735             :   /// described in \p FunctionList.
     736             :   ///
     737             :   /// \param M The module we are outlining from.
     738             :   /// \param CandidateList A list of candidates to be outlined.
     739             :   /// \param FunctionList A list of functions to be inserted into the module.
     740             :   /// \param Mapper Contains the instruction mappings for the module.
     741             :   bool outline(Module &M,
     742             :                const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
     743             :                std::vector<OutlinedFunction> &FunctionList,
     744             :                InstructionMapper &Mapper);
     745             : 
     746             :   /// Creates a function for \p OF and inserts it into the module.
     747             :   MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
     748             :                                           InstructionMapper &Mapper);
     749             : 
     750             :   /// Find potential outlining candidates and store them in \p CandidateList.
     751             :   ///
     752             :   /// For each type of potential candidate, also build an \p OutlinedFunction
     753             :   /// struct containing the information to build the function for that
     754             :   /// candidate.
     755             :   ///
     756             :   /// \param[out] CandidateList Filled with outlining candidates for the module.
     757             :   /// \param[out] FunctionList Filled with functions corresponding to each type
     758             :   /// of \p Candidate.
     759             :   /// \param ST The suffix tree for the module.
     760             :   /// \param TII TargetInstrInfo for the module.
     761             :   ///
     762             :   /// \returns The length of the longest candidate found. 0 if there are none.
     763             :   unsigned
     764             :   buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
     765             :                      std::vector<OutlinedFunction> &FunctionList,
     766             :                      SuffixTree &ST, InstructionMapper &Mapper,
     767             :                      const TargetInstrInfo &TII);
     768             : 
     769             :   /// Helper function for pruneOverlaps.
     770             :   /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
     771             :   void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
     772             : 
     773             :   /// Remove any overlapping candidates that weren't handled by the
     774             :   /// suffix tree's pruning method.
     775             :   ///
     776             :   /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
     777             :   /// If a short candidate is chosen for outlining, then a longer candidate
     778             :   /// which has that short candidate as a suffix is chosen, the tree's pruning
     779             :   /// method will not find it. Thus, we need to prune before outlining as well.
     780             :   ///
     781             :   /// \param[in,out] CandidateList A list of outlining candidates.
     782             :   /// \param[in,out] FunctionList A list of functions to be outlined.
     783             :   /// \param Mapper Contains instruction mapping info for outlining.
     784             :   /// \param MaxCandidateLen The length of the longest candidate.
     785             :   /// \param TII TargetInstrInfo for the module.
     786             :   void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
     787             :                      std::vector<OutlinedFunction> &FunctionList,
     788             :                      InstructionMapper &Mapper, unsigned MaxCandidateLen,
     789             :                      const TargetInstrInfo &TII);
     790             : 
     791             :   /// Construct a suffix tree on the instructions in \p M and outline repeated
     792             :   /// strings from that tree.
     793             :   bool runOnModule(Module &M) override;
     794             : 
     795             :   /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
     796             :   /// function for remark emission.
     797          21 :   DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
     798             :     DISubprogram *SP;
     799          21 :     for (const std::shared_ptr<Candidate> &C : OF.Candidates)
     800          82 :       if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
     801             :         return SP;
     802             :     return nullptr;
     803             :   }
     804             : };
     805             : 
     806             : } // Anonymous namespace.
     807             : 
     808             : char MachineOutliner::ID = 0;
     809             : 
     810             : namespace llvm {
     811          14 : ModulePass *createMachineOutlinerPass() {
     812          14 :   return new MachineOutliner();
     813             : }
     814             : 
     815             : } // namespace llvm
     816             : 
     817      148148 : INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
     818             :                 false)
     819             : 
     820          18 : unsigned MachineOutliner::findCandidates(
     821             :     SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
     822             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
     823             :     std::vector<OutlinedFunction> &FunctionList) {
     824          18 :   CandidateList.clear();
     825             :   FunctionList.clear();
     826             :   unsigned MaxLen = 0;
     827             : 
     828             :   // FIXME: Visit internal nodes instead of leaves.
     829         782 :   for (SuffixTreeNode *Leaf : ST.LeafVector) {
     830             :     assert(Leaf && "Leaves in LeafVector cannot be null!");
     831         764 :     if (!Leaf->IsInTree)
     832         701 :       continue;
     833             : 
     834             :     assert(Leaf->Parent && "All leaves must have parents!");
     835         609 :     SuffixTreeNode &Parent = *(Leaf->Parent);
     836             : 
     837             :     // If it doesn't appear enough, or we already outlined from it, skip it.
     838         609 :     if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
     839             :       continue;
     840             : 
     841             :     // Figure out if this candidate is beneficial.
     842         486 :     unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
     843             : 
     844             :     // Too short to be beneficial; skip it.
     845             :     // FIXME: This isn't necessarily true for, say, X86. If we factor in
     846             :     // instruction lengths we need more information than this.
     847         243 :     if (StringLen < 2)
     848             :       continue;
     849             : 
     850             :     // If this is a beneficial class of candidate, then every one is stored in
     851             :     // this vector.
     852             :     std::vector<Candidate> CandidatesForRepeatedSeq;
     853             : 
     854             :     // Figure out the call overhead for each instance of the sequence.
     855         539 :     for (auto &ChildPair : Parent.Children) {
     856         285 :       SuffixTreeNode *M = ChildPair.second;
     857             : 
     858         285 :       if (M && M->IsInTree && M->isLeaf()) {
     859             :         // Never visit this leaf again.
     860         282 :         M->IsInTree = false;
     861         282 :         unsigned StartIdx = M->SuffixIdx;
     862         282 :         unsigned EndIdx = StartIdx + StringLen - 1;
     863             : 
     864             :         // Trick: Discard some candidates that would be incompatible with the
     865             :         // ones we've already found for this sequence. This will save us some
     866             :         // work in candidate selection.
     867             :         //
     868             :         // If two candidates overlap, then we can't outline them both. This
     869             :         // happens when we have candidates that look like, say
     870             :         //
     871             :         // AA (where each "A" is an instruction).
     872             :         //
     873             :         // We might have some portion of the module that looks like this:
     874             :         // AAAAAA (6 A's) 
     875             :         //
     876             :         // In this case, there are 5 different copies of "AA" in this range, but
     877             :         // at most 3 can be outlined. If only outlining 3 of these is going to
     878             :         // be unbeneficial, then we ought to not bother.
     879             :         //
     880             :         // Note that two things DON'T overlap when they look like this:
     881             :         // start1...end1 .... start2...end2
     882             :         // That is, one must either
     883             :         // * End before the other starts
     884             :         // * Start after the other ends
     885         282 :         if (std::all_of(CandidatesForRepeatedSeq.begin(),
     886             :                         CandidatesForRepeatedSeq.end(),
     887             :                         [&StartIdx, &EndIdx](const Candidate &C) {
     888         274 :                           return (EndIdx < C.getStartIdx() ||
     889          89 :                                   StartIdx > C.getEndIdx());
     890             :                         })) {
     891             :           // It doesn't overlap with anything, so we can outline it.
     892             :           // Each sequence is over [StartIt, EndIt].
     893             :           // Save the candidate and its location.
     894             : 
     895         564 :           MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
     896         564 :           MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
     897             : 
     898         282 :           CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
     899         564 :                                                 EndIt, StartIt->getParent(),
     900         846 :                                                 FunctionList.size());
     901             :         }
     902             :       }
     903             :     }
     904             : 
     905             :     // We've found something we might want to outline.
     906             :     // Create an OutlinedFunction to store it and check if it'd be beneficial
     907             :     // to outline.
     908             :     TargetCostInfo TCI =
     909         127 :         TII.getOutlininingCandidateInfo(CandidatesForRepeatedSeq);
     910             :     std::vector<unsigned> Seq;
     911         794 :     for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
     912        1334 :       Seq.push_back(ST.Str[i]);
     913         254 :     OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
     914         317 :                         Seq, TCI);
     915             :     unsigned Benefit = OF.getBenefit();
     916             : 
     917             :     // Is it better to outline this candidate than not?
     918          84 :     if (Benefit < 1) {
     919             :       // Outlining this candidate would take more instructions than not
     920             :       // outlining.
     921             :       // Emit a remark explaining why we didn't outline this candidate.
     922             :       Candidate &C = CandidatesForRepeatedSeq.front();
     923          64 :       MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
     924          78 :       MORE.emit([&]() {
     925             :         MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
     926          42 :                                           C.front()->getDebugLoc(), C.getMBB());
     927          42 :         R << "Did not outline " << NV("Length", StringLen) << " instructions"
     928          70 :           << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
     929             :           << " locations."
     930             :           << " Bytes from outlining all occurrences ("
     931          56 :           << NV("OutliningCost", OF.getOutliningCost()) << ")"
     932             :           << " >= Unoutlined instruction bytes ("
     933          42 :           << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
     934          56 :           << " (Also found at: ";
     935             : 
     936             :         // Tell the user the other places the candidate was found.
     937          42 :         for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
     938          56 :           R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
     939          28 :                   CandidatesForRepeatedSeq[i].front()->getDebugLoc());
     940          14 :           if (i != e - 1)
     941             :             R << ", ";
     942             :         }
     943             : 
     944             :         R << ")";
     945          14 :         return R;
     946             :       });
     947             : 
     948             :       // Move to the next candidate.
     949          64 :       continue;
     950             :     }
     951             : 
     952          63 :     if (StringLen > MaxLen)
     953             :       MaxLen = StringLen;
     954             : 
     955             :     // At this point, the candidate class is seen as beneficial. Set their
     956             :     // benefit values and save them in the candidate list.
     957          63 :     std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
     958         207 :     for (Candidate &C : CandidatesForRepeatedSeq) {
     959         144 :       C.Benefit = Benefit;
     960         144 :       C.TCI = TCI;
     961             :       std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
     962         144 :       CandidateList.push_back(Cptr);
     963         144 :       CandidatesForFn.push_back(Cptr);
     964             :     }
     965             : 
     966          63 :     FunctionList.push_back(OF);
     967          63 :     FunctionList.back().Candidates = CandidatesForFn;
     968             : 
     969             :     // Move to the next function.
     970          63 :     Parent.IsInTree = false;
     971             :   }
     972             : 
     973          18 :   return MaxLen;
     974             : }
     975             : 
     976             : // Remove C from the candidate space, and update its OutlinedFunction.
     977             : void MachineOutliner::prune(Candidate &C,
     978             :                             std::vector<OutlinedFunction> &FunctionList) {
     979             :   // Get the OutlinedFunction associated with this Candidate.
     980          47 :   OutlinedFunction &F = FunctionList[C.FunctionIdx];
     981             : 
     982             :   // Update C's associated function's occurrence count.
     983             :   F.decrement();
     984             : 
     985             :   // Remove C from the CandidateList.
     986          96 :   C.InCandidateList = false;
     987             : 
     988             :   LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
     989             :              dbgs() << "--- Num fns left for candidate: "
     990             :                     << F.getOccurrenceCount() << "\n";
     991             :              dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
     992             :                     << "\n";);
     993             : }
     994             : 
     995          18 : void MachineOutliner::pruneOverlaps(
     996             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
     997             :     std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
     998             :     unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
     999             : 
    1000             :   // Return true if this candidate became unbeneficial for outlining in a
    1001             :   // previous step.
    1002         407 :   auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
    1003             : 
    1004             :     // Check if the candidate was removed in a previous step.
    1005         213 :     if (!C.InCandidateList)
    1006             :       return true;
    1007             : 
    1008             :     // C must be alive. Check if we should remove it.
    1009         537 :     if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
    1010             :       prune(C, FunctionList);
    1011             :       return true;
    1012             :     }
    1013             : 
    1014             :     // C is in the list, and F is still beneficial.
    1015             :     return false;
    1016          18 :   };
    1017             : 
    1018             :   // TODO: Experiment with interval trees or other interval-checking structures
    1019             :   // to lower the time complexity of this function.
    1020             :   // TODO: Can we do better than the simple greedy choice?
    1021             :   // Check for overlaps in the range.
    1022             :   // This is O(MaxCandidateLen * CandidateList.size()).
    1023         162 :   for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
    1024             :        It++) {
    1025             :     Candidate &C1 = **It;
    1026             : 
    1027             :     // If C1 was already pruned, or its function is no longer beneficial for
    1028             :     // outlining, move to the next candidate.
    1029         144 :     if (ShouldSkipCandidate(C1))
    1030             :       continue;
    1031             : 
    1032             :     // The minimum start index of any candidate that could overlap with this
    1033             :     // one.
    1034             :     unsigned FarthestPossibleIdx = 0;
    1035             : 
    1036             :     // Either the index is 0, or it's at most MaxCandidateLen indices away.
    1037          95 :     if (C1.getStartIdx() > MaxCandidateLen)
    1038          83 :       FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
    1039             : 
    1040             :     // Compare against the candidates in the list that start at most
    1041             :     // FarthestPossibleIdx indices away from C1. There are at most
    1042             :     // MaxCandidateLen of these.
    1043         117 :     for (auto Sit = It + 1; Sit != Et; Sit++) {
    1044             :       Candidate &C2 = **Sit;
    1045             : 
    1046             :       // Is this candidate too far away to overlap?
    1047         100 :       if (C2.getStartIdx() < FarthestPossibleIdx)
    1048             :         break;
    1049             : 
    1050             :       // If C2 was already pruned, or its function is no longer beneficial for
    1051             :       // outlining, move to the next candidate.
    1052          69 :       if (ShouldSkipCandidate(C2))
    1053             :         continue;
    1054             : 
    1055             :       // Do C1 and C2 overlap?
    1056             :       //
    1057             :       // Not overlapping:
    1058             :       // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
    1059             :       //
    1060             :       // We sorted our candidate list so C2Start <= C1Start. We know that
    1061             :       // C2End > C2Start since each candidate has length >= 2. Therefore, all we
    1062             :       // have to check is C2End < C2Start to see if we overlap.
    1063         100 :       if (C2.getEndIdx() < C1.getStartIdx())
    1064             :         continue;
    1065             : 
    1066             :       // C1 and C2 overlap.
    1067             :       // We need to choose the better of the two.
    1068             :       //
    1069             :       // Approximate this by picking the one which would have saved us the
    1070             :       // most instructions before any pruning.
    1071             : 
    1072             :       // Is C2 a better candidate?
    1073          47 :       if (C2.Benefit > C1.Benefit) {
    1074             :         // Yes, so prune C1. Since C1 is dead, we don't have to compare it
    1075             :         // against anything anymore, so break.
    1076          47 :         prune(C1, FunctionList);
    1077             :         break;
    1078             :       }
    1079             : 
    1080             :       // Prune C2 and move on to the next candidate.
    1081           0 :       prune(C2, FunctionList);
    1082             :     }
    1083             :   }
    1084          18 : }
    1085             : 
    1086          18 : unsigned MachineOutliner::buildCandidateList(
    1087             :     std::vector<std::shared_ptr<Candidate>> &CandidateList,
    1088             :     std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
    1089             :     InstructionMapper &Mapper, const TargetInstrInfo &TII) {
    1090             : 
    1091             :   std::vector<unsigned> CandidateSequence; // Current outlining candidate.
    1092             :   unsigned MaxCandidateLen = 0;            // Length of the longest candidate.
    1093             : 
    1094          18 :   MaxCandidateLen =
    1095             :       findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
    1096             : 
    1097             :   // Sort the candidates in decending order. This will simplify the outlining
    1098             :   // process when we have to remove the candidates from the mapping by
    1099             :   // allowing us to cut them out without keeping track of an offset.
    1100             :   std::stable_sort(
    1101             :       CandidateList.begin(), CandidateList.end(),
    1102             :       [](const std::shared_ptr<Candidate> &LHS,
    1103             :          const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
    1104             : 
    1105          18 :   return MaxCandidateLen;
    1106             : }
    1107             : 
    1108             : MachineFunction *
    1109          21 : MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
    1110             :                                         InstructionMapper &Mapper) {
    1111             : 
    1112             :   // Create the function name. This should be unique. For now, just hash the
    1113             :   // module name and include it in the function name plus the number of this
    1114             :   // function.
    1115          42 :   std::ostringstream NameStream;
    1116          21 :   NameStream << "OUTLINED_FUNCTION_" << OF.Name;
    1117             : 
    1118             :   // Create the function using an IR-level function.
    1119          21 :   LLVMContext &C = M.getContext();
    1120          21 :   Function *F = dyn_cast<Function>(
    1121          42 :       M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
    1122             :   assert(F && "Function was null!");
    1123             : 
    1124             :   // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
    1125             :   // which gives us better results when we outline from linkonceodr functions.
    1126          21 :   F->setLinkage(GlobalValue::InternalLinkage);
    1127             :   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    1128             : 
    1129             :   // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
    1130             :   // necessary.
    1131             : 
    1132             :   // Set optsize/minsize, so we don't insert padding between outlined
    1133             :   // functions.
    1134             :   F->addFnAttr(Attribute::OptimizeForSize);
    1135          21 :   F->addFnAttr(Attribute::MinSize);
    1136             : 
    1137             :   // Save F so that we can add debug info later if we need to.
    1138          21 :   CreatedIRFunctions.push_back(F);
    1139             : 
    1140          42 :   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
    1141             :   IRBuilder<> Builder(EntryBB);
    1142          21 :   Builder.CreateRetVoid();
    1143             : 
    1144          21 :   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
    1145          21 :   MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
    1146          21 :   MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
    1147          21 :   const TargetSubtargetInfo &STI = MF.getSubtarget();
    1148          21 :   const TargetInstrInfo &TII = *STI.getInstrInfo();
    1149             : 
    1150             :   // Insert the new function into the module.
    1151             :   MF.insert(MF.begin(), &MBB);
    1152             : 
    1153          21 :   TII.insertOutlinerPrologue(MBB, MF, OF.TCI);
    1154             : 
    1155             :   // Copy over the instructions for the function using the integer mappings in
    1156             :   // its sequence.
    1157          21 :   for (unsigned Str : OF.Sequence) {
    1158             :     MachineInstr *NewMI =
    1159         143 :         MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
    1160             :     NewMI->dropMemRefs();
    1161             : 
    1162             :     // Don't keep debug information for outlined instructions.
    1163         286 :     NewMI->setDebugLoc(DebugLoc());
    1164             :     MBB.insert(MBB.end(), NewMI);
    1165             :   }
    1166             : 
    1167          21 :   TII.insertOutlinerEpilogue(MBB, MF, OF.TCI);
    1168             : 
    1169             :   // If there's a DISubprogram associated with this outlined function, then
    1170             :   // emit debug info for the outlined function.
    1171          21 :   if (DISubprogram *SP = getSubprogramOrNull(OF)) {
    1172             :     // We have a DISubprogram. Get its DICompileUnit.
    1173             :     DICompileUnit *CU = SP->getUnit();
    1174          10 :     DIBuilder DB(M, true, CU);
    1175             :     DIFile *Unit = SP->getFile();
    1176             :     Mangler Mg;
    1177             : 
    1178             :     // Walk over each IR function we created in the outliner and create
    1179             :     // DISubprograms for each function.
    1180          11 :     for (Function *F : CreatedIRFunctions) {
    1181             :       // Get the mangled name of the function for the linkage name.
    1182             :       std::string Dummy;
    1183           6 :       llvm::raw_string_ostream MangledNameStream(Dummy);
    1184           6 :       Mg.getNameWithPrefix(MangledNameStream, F, false);
    1185             : 
    1186          18 :       DISubprogram *SP = DB.createFunction(
    1187             :           Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
    1188             :           Unit /* File */,
    1189             :           0 /* Line 0 is reserved for compiler-generated code. */,
    1190             :           DB.createSubroutineType(
    1191             :               DB.getOrCreateTypeArray(None)), /* void type */
    1192             :           false, true, 0, /* Line 0 is reserved for compiler-generated code. */
    1193             :           DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
    1194           6 :           true /* Outlined code is optimized code by definition. */);
    1195             : 
    1196             :       // Don't add any new variables to the subprogram.
    1197           6 :       DB.finalizeSubprogram(SP);
    1198             : 
    1199             :       // Attach subprogram to the function.
    1200           6 :       F->setSubprogram(SP);
    1201             :     }
    1202             : 
    1203             :     // We're done with the DIBuilder.
    1204           5 :     DB.finalize();
    1205             :   }
    1206             : 
    1207             :   // Outlined functions shouldn't preserve liveness.
    1208             :   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
    1209          21 :   MF.getRegInfo().freezeReservedRegs(MF);
    1210          21 :   return &MF;
    1211             : }
    1212             : 
    1213          18 : bool MachineOutliner::outline(
    1214             :     Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
    1215             :     std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
    1216             : 
    1217             :   bool OutlinedSomething = false;
    1218             :   // Replace the candidates with calls to their respective outlined functions.
    1219         324 :   for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
    1220             :     Candidate &C = *Cptr;
    1221             :     // Was the candidate removed during pruneOverlaps?
    1222         144 :     if (!C.InCandidateList)
    1223         192 :       continue;
    1224             : 
    1225             :     // If not, then look at its OutlinedFunction.
    1226          48 :     OutlinedFunction &OF = FunctionList[C.FunctionIdx];
    1227             : 
    1228             :     // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
    1229          48 :     if (OF.getBenefit() < 1)
    1230           0 :       continue;
    1231             : 
    1232             :     // Does this candidate have a function yet?
    1233          48 :     if (!OF.MF) {
    1234          21 :       OF.MF = createOutlinedFunction(M, OF, Mapper);
    1235             :       MachineBasicBlock *MBB = &*OF.MF->begin();
    1236             : 
    1237             :       // Output a remark telling the user that an outlined function was created,
    1238             :       // and explaining where it came from.
    1239             :       MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
    1240             :       MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
    1241          63 :                                   MBB->findDebugLoc(MBB->begin()), MBB);
    1242          42 :       R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
    1243             :         << " bytes by "
    1244          63 :         << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
    1245          63 :         << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
    1246             :         << " locations. "
    1247          63 :         << "(Found at: ";
    1248             : 
    1249             :       // Tell the user the other places the candidate was found.
    1250         138 :       for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
    1251             : 
    1252             :         // Skip over things that were pruned.
    1253          96 :         if (!OF.Candidates[i]->InCandidateList)
    1254           0 :           continue;
    1255             : 
    1256          96 :         R << NV(
    1257          96 :             (Twine("StartLoc") + Twine(i)).str(),
    1258             :             OF.Candidates[i]->front()->getDebugLoc());
    1259          48 :         if (i != e - 1)
    1260             :           R << ", ";
    1261             :       }
    1262             : 
    1263             :       R << ")";
    1264             : 
    1265          21 :       MORE.emit(R);
    1266             :       FunctionsCreated++;
    1267             :     }
    1268             : 
    1269          48 :     MachineFunction *MF = OF.MF;
    1270          48 :     MachineBasicBlock &MBB = *C.getMBB();
    1271          48 :     MachineBasicBlock::iterator StartIt = C.front();
    1272          48 :     MachineBasicBlock::iterator EndIt = C.back();
    1273             :     assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
    1274             :     assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
    1275             : 
    1276          48 :     const TargetSubtargetInfo &STI = MF->getSubtarget();
    1277          48 :     const TargetInstrInfo &TII = *STI.getInstrInfo();
    1278             : 
    1279             :     // Insert a call to the new function and erase the old sequence.
    1280          48 :     auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C.TCI);
    1281             : 
    1282             :     // If the caller tracks liveness, then we need to make sure that anything
    1283             :     // we outline doesn't break liveness assumptions.
    1284             :     // The outlined functions themselves currently don't track liveness, but
    1285             :     // we should make sure that the ranges we yank things out of aren't
    1286             :     // wrong.
    1287          96 :     if (MBB.getParent()->getProperties().hasProperty(
    1288             :             MachineFunctionProperties::Property::TracksLiveness)) {
    1289             :       // Helper lambda for adding implicit def operands to the call instruction.
    1290         568 :       auto CopyDefs = [&CallInst](MachineInstr &MI) {
    1291        2669 :         for (MachineOperand &MOP : MI.operands()) {
    1292             :           // Skip over anything that isn't a register.
    1293        1166 :           if (!MOP.isReg())
    1294         445 :             continue;
    1295             : 
    1296             :           // If it's a def, add it to the call instruction.
    1297         721 :           if (MOP.isDef())
    1298         231 :             CallInst->addOperand(
    1299         231 :                 MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
    1300         231 :                                           true /* isImp = true */));
    1301             :         }
    1302         385 :       };
    1303             : 
    1304             :       // Copy over the defs in the outlined range.
    1305             :       // First inst in outlined range <-- Anything that's defined in this
    1306             :       // ...                           .. range has to be added as an implicit
    1307             :       // Last inst in outlined range  <-- def to the call instruction.
    1308          48 :       std::for_each(CallInst, EndIt, CopyDefs);
    1309             :     }
    1310             : 
    1311             :     // Erase from the point after where the call was inserted up to, and
    1312             :     // including, the final instruction in the sequence.
    1313             :     // Erase needs one past the end, so we need std::next there too.
    1314             :     MBB.erase(std::next(StartIt), std::next(EndIt));
    1315             :     OutlinedSomething = true;
    1316             : 
    1317             :     // Statistics.
    1318             :     NumOutlined++;
    1319             :   }
    1320             : 
    1321             :   LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
    1322             : 
    1323          18 :   return OutlinedSomething;
    1324             : }
    1325             : 
    1326          18 : bool MachineOutliner::runOnModule(Module &M) {
    1327             :   // Check if there's anything in the module. If it's empty, then there's
    1328             :   // nothing to outline.
    1329          18 :   if (M.empty())
    1330             :     return false;
    1331             : 
    1332          18 :   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
    1333             :   const TargetSubtargetInfo &STI =
    1334          18 :       MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
    1335          18 :   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
    1336          18 :   const TargetInstrInfo *TII = STI.getInstrInfo();
    1337             : 
    1338             :   // Does the target implement the MachineOutliner? If it doesn't, quit here.
    1339          18 :   if (!TII->useMachineOutliner()) {
    1340             :     // No. So we're done.
    1341             :     LLVM_DEBUG(
    1342             :         dbgs()
    1343             :         << "Skipping pass: Target does not support the MachineOutliner.\n");
    1344             :     return false;
    1345             :   }
    1346             : 
    1347             :   // If the user specifies that they want to outline from linkonceodrs, set
    1348             :   // it here.
    1349          18 :   OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
    1350             : 
    1351          36 :   InstructionMapper Mapper;
    1352             : 
    1353             :   // Build instruction mappings for each function in the module. Start by
    1354             :   // iterating over each Function in M.
    1355          94 :   for (Function &F : M) {
    1356             : 
    1357             :     // If there's nothing in F, then there's no reason to try and outline from
    1358             :     // it.
    1359          76 :     if (F.empty())
    1360          20 :       continue;
    1361             : 
    1362             :     // There's something in F. Check if it has a MachineFunction associated with
    1363             :     // it.
    1364          56 :     MachineFunction *MF = MMI.getMachineFunction(F);
    1365             : 
    1366             :     // If it doesn't, then there's nothing to outline from. Move to the next
    1367             :     // Function.
    1368          56 :     if (!MF)
    1369           0 :       continue;
    1370             : 
    1371             :     // We have a MachineFunction. Ask the target if it's suitable for outlining.
    1372             :     // If it isn't, then move on to the next Function in the module.
    1373          56 :     if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
    1374           8 :       continue;
    1375             : 
    1376             :     // We have a function suitable for outlining. Iterate over every
    1377             :     // MachineBasicBlock in MF and try to map its instructions to a list of
    1378             :     // unsigned integers.
    1379         110 :     for (MachineBasicBlock &MBB : *MF) {
    1380             :       // If there isn't anything in MBB, then there's no point in outlining from
    1381             :       // it.
    1382          62 :       if (MBB.empty())
    1383           0 :         continue;
    1384             : 
    1385             :       // Check if MBB could be the target of an indirect branch. If it is, then
    1386             :       // we don't want to outline from it.
    1387          62 :       if (MBB.hasAddressTaken())
    1388           0 :         continue;
    1389             : 
    1390             :       // MBB is suitable for outlining. Map it to a list of unsigneds.
    1391          62 :       Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
    1392             :     }
    1393             :   }
    1394             : 
    1395             :   // Construct a suffix tree, use it to find candidates, and then outline them.
    1396          36 :   SuffixTree ST(Mapper.UnsignedVec);
    1397          18 :   std::vector<std::shared_ptr<Candidate>> CandidateList;
    1398          18 :   std::vector<OutlinedFunction> FunctionList;
    1399             : 
    1400             :   // Find all of the outlining candidates.
    1401             :   unsigned MaxCandidateLen =
    1402          18 :       buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
    1403             : 
    1404             :   // Remove candidates that overlap with other candidates.
    1405          18 :   pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
    1406             : 
    1407             :   // Outline each of the candidates and return true if something was outlined.
    1408          18 :   bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
    1409             : 
    1410             :   return OutlinedSomething;
    1411      303507 : }

Generated by: LCOV version 1.13