LLVM  7.0.0svn
MachineOutliner.cpp
Go to the documentation of this file.
1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Replaces repeated sequences of instructions with function calls.
12 ///
13 /// This works by placing every instruction from every basic block in a
14 /// suffix tree, and repeatedly querying that tree for repeated sequences of
15 /// instructions. If a sequence of instructions appears often, then it ought
16 /// to be beneficial to pull out into a function.
17 ///
18 /// The MachineOutliner communicates with a given target using hooks defined in
19 /// TargetInstrInfo.h. The target supplies the outliner with information on how
20 /// a specific sequence of instructions should be outlined. This information
21 /// is used to deduce the number of instructions necessary to
22 ///
23 /// * Create an outlined function
24 /// * Call that outlined function
25 ///
26 /// Targets must implement
27 /// * getOutliningCandidateInfo
28 /// * buildOutlinedFrame
29 /// * insertOutlinedCall
30 /// * isFunctionSafeToOutlineFrom
31 ///
32 /// in order to make use of the MachineOutliner.
33 ///
34 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
35 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
36 /// how this pass works, the talk is available on YouTube at
37 ///
38 /// https://www.youtube.com/watch?v=yorld-WSOeU
39 ///
40 /// The slides for the talk are available at
41 ///
42 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
43 ///
44 /// The talk provides an overview of how the outliner finds candidates and
45 /// ultimately outlines them. It describes how the main data structure for this
46 /// pass, the suffix tree, is queried and purged for candidates. It also gives
47 /// a simplified suffix tree construction algorithm for suffix trees based off
48 /// of the algorithm actually used here, Ukkonen's algorithm.
49 ///
50 /// For the original RFC for this pass, please see
51 ///
52 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
53 ///
54 /// For more information on the suffix tree data structure, please see
55 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
56 ///
57 //===----------------------------------------------------------------------===//
59 #include "llvm/ADT/DenseMap.h"
60 #include "llvm/ADT/Statistic.h"
61 #include "llvm/ADT/Twine.h"
66 #include "llvm/CodeGen/Passes.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/Mangler.h"
72 #include "llvm/Support/Allocator.h"
74 #include "llvm/Support/Debug.h"
76 #include <functional>
77 #include <map>
78 #include <sstream>
79 #include <tuple>
80 #include <vector>
81 
82 #define DEBUG_TYPE "machine-outliner"
83 
84 using namespace llvm;
85 using namespace ore;
86 using namespace outliner;
87 
88 STATISTIC(NumOutlined, "Number of candidates outlined");
89 STATISTIC(FunctionsCreated, "Number of functions created");
90 
91 // Set to true if the user wants the outliner to run on linkonceodr linkage
92 // functions. This is false by default because the linker can dedupe linkonceodr
93 // functions. Since the outliner is confined to a single module (modulo LTO),
94 // this is off by default. It should, however, be the default behaviour in
95 // LTO.
97  "enable-linkonceodr-outlining",
98  cl::Hidden,
99  cl::desc("Enable the machine outliner on linkonceodr functions"),
100  cl::init(false));
101 
102 namespace {
103 
104 /// Represents an undefined index in the suffix tree.
105 const unsigned EmptyIdx = -1;
106 
107 /// A node in a suffix tree which represents a substring or suffix.
108 ///
109 /// Each node has either no children or at least two children, with the root
110 /// being a exception in the empty tree.
111 ///
112 /// Children are represented as a map between unsigned integers and nodes. If
113 /// a node N has a child M on unsigned integer k, then the mapping represented
114 /// by N is a proper prefix of the mapping represented by M. Note that this,
115 /// although similar to a trie is somewhat different: each node stores a full
116 /// substring of the full mapping rather than a single character state.
117 ///
118 /// Each internal node contains a pointer to the internal node representing
119 /// the same string, but with the first character chopped off. This is stored
120 /// in \p Link. Each leaf node stores the start index of its respective
121 /// suffix in \p SuffixIdx.
122 struct SuffixTreeNode {
123 
124  /// The children of this node.
125  ///
126  /// A child existing on an unsigned integer implies that from the mapping
127  /// represented by the current node, there is a way to reach another
128  /// mapping by tacking that character on the end of the current string.
130 
131  /// A flag set to false if the node has been pruned from the tree.
132  bool IsInTree = true;
133 
134  /// The start index of this node's substring in the main string.
135  unsigned StartIdx = EmptyIdx;
136 
137  /// The end index of this node's substring in the main string.
138  ///
139  /// Every leaf node must have its \p EndIdx incremented at the end of every
140  /// step in the construction algorithm. To avoid having to update O(N)
141  /// nodes individually at the end of every step, the end index is stored
142  /// as a pointer.
143  unsigned *EndIdx = nullptr;
144 
145  /// For leaves, the start index of the suffix represented by this node.
146  ///
147  /// For all other nodes, this is ignored.
148  unsigned SuffixIdx = EmptyIdx;
149 
150  /// For internal nodes, a pointer to the internal node representing
151  /// the same sequence with the first character chopped off.
152  ///
153  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
154  /// Ukkonen's algorithm does to achieve linear-time construction is
155  /// keep track of which node the next insert should be at. This makes each
156  /// insert O(1), and there are a total of O(N) inserts. The suffix link
157  /// helps with inserting children of internal nodes.
158  ///
159  /// Say we add a child to an internal node with associated mapping S. The
160  /// next insertion must be at the node representing S - its first character.
161  /// This is given by the way that we iteratively build the tree in Ukkonen's
162  /// algorithm. The main idea is to look at the suffixes of each prefix in the
163  /// string, starting with the longest suffix of the prefix, and ending with
164  /// the shortest. Therefore, if we keep pointers between such nodes, we can
165  /// move to the next insertion point in O(1) time. If we don't, then we'd
166  /// have to query from the root, which takes O(N) time. This would make the
167  /// construction algorithm O(N^2) rather than O(N).
168  SuffixTreeNode *Link = nullptr;
169 
170  /// The parent of this node. Every node except for the root has a parent.
171  SuffixTreeNode *Parent = nullptr;
172 
173  /// The number of times this node's string appears in the tree.
174  ///
175  /// This is equal to the number of leaf children of the string. It represents
176  /// the number of suffixes that the node's string is a prefix of.
177  unsigned OccurrenceCount = 0;
178 
179  /// The length of the string formed by concatenating the edge labels from the
180  /// root to this node.
181  unsigned ConcatLen = 0;
182 
183  /// Returns true if this node is a leaf.
184  bool isLeaf() const { return SuffixIdx != EmptyIdx; }
185 
186  /// Returns true if this node is the root of its owning \p SuffixTree.
187  bool isRoot() const { return StartIdx == EmptyIdx; }
188 
189  /// Return the number of elements in the substring associated with this node.
190  size_t size() const {
191 
192  // Is it the root? If so, it's the empty string so return 0.
193  if (isRoot())
194  return 0;
195 
196  assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
197 
198  // Size = the number of elements in the string.
199  // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
200  return *EndIdx - StartIdx + 1;
201  }
202 
203  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
204  SuffixTreeNode *Parent)
205  : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
206 
207  SuffixTreeNode() {}
208 };
209 
210 /// A data structure for fast substring queries.
211 ///
212 /// Suffix trees represent the suffixes of their input strings in their leaves.
213 /// A suffix tree is a type of compressed trie structure where each node
214 /// represents an entire substring rather than a single character. Each leaf
215 /// of the tree is a suffix.
216 ///
217 /// A suffix tree can be seen as a type of state machine where each state is a
218 /// substring of the full string. The tree is structured so that, for a string
219 /// of length N, there are exactly N leaves in the tree. This structure allows
220 /// us to quickly find repeated substrings of the input string.
221 ///
222 /// In this implementation, a "string" is a vector of unsigned integers.
223 /// These integers may result from hashing some data type. A suffix tree can
224 /// contain 1 or many strings, which can then be queried as one large string.
225 ///
226 /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
227 /// suffix tree construction. Ukkonen's algorithm is explained in more detail
228 /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
229 /// paper is available at
230 ///
231 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
232 class SuffixTree {
233 public:
234  /// Stores each leaf node in the tree.
235  ///
236  /// This is used for finding outlining candidates.
237  std::vector<SuffixTreeNode *> LeafVector;
238 
239  /// Each element is an integer representing an instruction in the module.
240  ArrayRef<unsigned> Str;
241 
242 private:
243  /// Maintains each node in the tree.
245 
246  /// The root of the suffix tree.
247  ///
248  /// The root represents the empty string. It is maintained by the
249  /// \p NodeAllocator like every other node in the tree.
250  SuffixTreeNode *Root = nullptr;
251 
252  /// Maintains the end indices of the internal nodes in the tree.
253  ///
254  /// Each internal node is guaranteed to never have its end index change
255  /// during the construction algorithm; however, leaves must be updated at
256  /// every step. Therefore, we need to store leaf end indices by reference
257  /// to avoid updating O(N) leaves at every step of construction. Thus,
258  /// every internal node must be allocated its own end index.
259  BumpPtrAllocator InternalEndIdxAllocator;
260 
261  /// The end index of each leaf in the tree.
262  unsigned LeafEndIdx = -1;
263 
264  /// Helper struct which keeps track of the next insertion point in
265  /// Ukkonen's algorithm.
266  struct ActiveState {
267  /// The next node to insert at.
268  SuffixTreeNode *Node;
269 
270  /// The index of the first character in the substring currently being added.
271  unsigned Idx = EmptyIdx;
272 
273  /// The length of the substring we have to add at the current step.
274  unsigned Len = 0;
275  };
276 
277  /// The point the next insertion will take place at in the
278  /// construction algorithm.
279  ActiveState Active;
280 
281  /// Allocate a leaf node and add it to the tree.
282  ///
283  /// \param Parent The parent of this node.
284  /// \param StartIdx The start index of this node's associated string.
285  /// \param Edge The label on the edge leaving \p Parent to this node.
286  ///
287  /// \returns A pointer to the allocated leaf node.
288  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
289  unsigned Edge) {
290 
291  assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
292 
293  SuffixTreeNode *N = new (NodeAllocator.Allocate())
294  SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
295  Parent.Children[Edge] = N;
296 
297  return N;
298  }
299 
300  /// Allocate an internal node and add it to the tree.
301  ///
302  /// \param Parent The parent of this node. Only null when allocating the root.
303  /// \param StartIdx The start index of this node's associated string.
304  /// \param EndIdx The end index of this node's associated string.
305  /// \param Edge The label on the edge leaving \p Parent to this node.
306  ///
307  /// \returns A pointer to the allocated internal node.
308  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
309  unsigned EndIdx, unsigned Edge) {
310 
311  assert(StartIdx <= EndIdx && "String can't start after it ends!");
312  assert(!(!Parent && StartIdx != EmptyIdx) &&
313  "Non-root internal nodes must have parents!");
314 
315  unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
316  SuffixTreeNode *N = new (NodeAllocator.Allocate())
317  SuffixTreeNode(StartIdx, E, Root, Parent);
318  if (Parent)
319  Parent->Children[Edge] = N;
320 
321  return N;
322  }
323 
324  /// Set the suffix indices of the leaves to the start indices of their
325  /// respective suffixes. Also stores each leaf in \p LeafVector at its
326  /// respective suffix index.
327  ///
328  /// \param[in] CurrNode The node currently being visited.
329  /// \param CurrIdx The current index of the string being visited.
330  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
331 
332  bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
333 
334  // Store the length of the concatenation of all strings from the root to
335  // this node.
336  if (!CurrNode.isRoot()) {
337  if (CurrNode.ConcatLen == 0)
338  CurrNode.ConcatLen = CurrNode.size();
339 
340  if (CurrNode.Parent)
341  CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
342  }
343 
344  // Traverse the tree depth-first.
345  for (auto &ChildPair : CurrNode.Children) {
346  assert(ChildPair.second && "Node had a null child!");
347  setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
348  }
349 
350  // Is this node a leaf?
351  if (IsLeaf) {
352  // If yes, give it a suffix index and bump its parent's occurrence count.
353  CurrNode.SuffixIdx = Str.size() - CurrIdx;
354  assert(CurrNode.Parent && "CurrNode had no parent!");
355  CurrNode.Parent->OccurrenceCount++;
356 
357  // Store the leaf in the leaf vector for pruning later.
358  LeafVector[CurrNode.SuffixIdx] = &CurrNode;
359  }
360  }
361 
362  /// Construct the suffix tree for the prefix of the input ending at
363  /// \p EndIdx.
364  ///
365  /// Used to construct the full suffix tree iteratively. At the end of each
366  /// step, the constructed suffix tree is either a valid suffix tree, or a
367  /// suffix tree with implicit suffixes. At the end of the final step, the
368  /// suffix tree is a valid tree.
369  ///
370  /// \param EndIdx The end index of the current prefix in the main string.
371  /// \param SuffixesToAdd The number of suffixes that must be added
372  /// to complete the suffix tree at the current phase.
373  ///
374  /// \returns The number of suffixes that have not been added at the end of
375  /// this step.
376  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
377  SuffixTreeNode *NeedsLink = nullptr;
378 
379  while (SuffixesToAdd > 0) {
380 
381  // Are we waiting to add anything other than just the last character?
382  if (Active.Len == 0) {
383  // If not, then say the active index is the end index.
384  Active.Idx = EndIdx;
385  }
386 
387  assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
388 
389  // The first character in the current substring we're looking at.
390  unsigned FirstChar = Str[Active.Idx];
391 
392  // Have we inserted anything starting with FirstChar at the current node?
393  if (Active.Node->Children.count(FirstChar) == 0) {
394  // If not, then we can just insert a leaf and move too the next step.
395  insertLeaf(*Active.Node, EndIdx, FirstChar);
396 
397  // The active node is an internal node, and we visited it, so it must
398  // need a link if it doesn't have one.
399  if (NeedsLink) {
400  NeedsLink->Link = Active.Node;
401  NeedsLink = nullptr;
402  }
403  } else {
404  // There's a match with FirstChar, so look for the point in the tree to
405  // insert a new node.
406  SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
407 
408  unsigned SubstringLen = NextNode->size();
409 
410  // Is the current suffix we're trying to insert longer than the size of
411  // the child we want to move to?
412  if (Active.Len >= SubstringLen) {
413  // If yes, then consume the characters we've seen and move to the next
414  // node.
415  Active.Idx += SubstringLen;
416  Active.Len -= SubstringLen;
417  Active.Node = NextNode;
418  continue;
419  }
420 
421  // Otherwise, the suffix we're trying to insert must be contained in the
422  // next node we want to move to.
423  unsigned LastChar = Str[EndIdx];
424 
425  // Is the string we're trying to insert a substring of the next node?
426  if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
427  // If yes, then we're done for this step. Remember our insertion point
428  // and move to the next end index. At this point, we have an implicit
429  // suffix tree.
430  if (NeedsLink && !Active.Node->isRoot()) {
431  NeedsLink->Link = Active.Node;
432  NeedsLink = nullptr;
433  }
434 
435  Active.Len++;
436  break;
437  }
438 
439  // The string we're trying to insert isn't a substring of the next node,
440  // but matches up to a point. Split the node.
441  //
442  // For example, say we ended our search at a node n and we're trying to
443  // insert ABD. Then we'll create a new node s for AB, reduce n to just
444  // representing C, and insert a new leaf node l to represent d. This
445  // allows us to ensure that if n was a leaf, it remains a leaf.
446  //
447  // | ABC ---split---> | AB
448  // n s
449  // C / \ D
450  // n l
451 
452  // The node s from the diagram
453  SuffixTreeNode *SplitNode =
454  insertInternalNode(Active.Node, NextNode->StartIdx,
455  NextNode->StartIdx + Active.Len - 1, FirstChar);
456 
457  // Insert the new node representing the new substring into the tree as
458  // a child of the split node. This is the node l from the diagram.
459  insertLeaf(*SplitNode, EndIdx, LastChar);
460 
461  // Make the old node a child of the split node and update its start
462  // index. This is the node n from the diagram.
463  NextNode->StartIdx += Active.Len;
464  NextNode->Parent = SplitNode;
465  SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
466 
467  // SplitNode is an internal node, update the suffix link.
468  if (NeedsLink)
469  NeedsLink->Link = SplitNode;
470 
471  NeedsLink = SplitNode;
472  }
473 
474  // We've added something new to the tree, so there's one less suffix to
475  // add.
476  SuffixesToAdd--;
477 
478  if (Active.Node->isRoot()) {
479  if (Active.Len > 0) {
480  Active.Len--;
481  Active.Idx = EndIdx - SuffixesToAdd + 1;
482  }
483  } else {
484  // Start the next phase at the next smallest suffix.
485  Active.Node = Active.Node->Link;
486  }
487  }
488 
489  return SuffixesToAdd;
490  }
491 
492 public:
493  /// Construct a suffix tree from a sequence of unsigned integers.
494  ///
495  /// \param Str The string to construct the suffix tree for.
496  SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
497  Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
498  Root->IsInTree = true;
499  Active.Node = Root;
500  LeafVector = std::vector<SuffixTreeNode *>(Str.size());
501 
502  // Keep track of the number of suffixes we have to add of the current
503  // prefix.
504  unsigned SuffixesToAdd = 0;
505  Active.Node = Root;
506 
507  // Construct the suffix tree iteratively on each prefix of the string.
508  // PfxEndIdx is the end index of the current prefix.
509  // End is one past the last element in the string.
510  for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
511  PfxEndIdx++) {
512  SuffixesToAdd++;
513  LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
514  SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
515  }
516 
517  // Set the suffix indices of each leaf.
518  assert(Root && "Root node can't be nullptr!");
519  setSuffixIndices(*Root, 0);
520  }
521 };
522 
523 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
524 struct InstructionMapper {
525 
526  /// The next available integer to assign to a \p MachineInstr that
527  /// cannot be outlined.
528  ///
529  /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
530  unsigned IllegalInstrNumber = -3;
531 
532  /// The next available integer to assign to a \p MachineInstr that can
533  /// be outlined.
534  unsigned LegalInstrNumber = 0;
535 
536  /// Correspondence from \p MachineInstrs to unsigned integers.
538  InstructionIntegerMap;
539 
540  /// Corresponcence from unsigned integers to \p MachineInstrs.
541  /// Inverse of \p InstructionIntegerMap.
542  DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
543 
544  /// The vector of unsigned integers that the module is mapped to.
545  std::vector<unsigned> UnsignedVec;
546 
547  /// Stores the location of the instruction associated with the integer
548  /// at index i in \p UnsignedVec for each index i.
549  std::vector<MachineBasicBlock::iterator> InstrList;
550 
551  /// Maps \p *It to a legal integer.
552  ///
553  /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
554  /// \p IntegerInstructionMap, and \p LegalInstrNumber.
555  ///
556  /// \returns The integer that \p *It was mapped to.
557  unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
558 
559  // Get the integer for this instruction or give it the current
560  // LegalInstrNumber.
561  InstrList.push_back(It);
562  MachineInstr &MI = *It;
563  bool WasInserted;
565  ResultIt;
566  std::tie(ResultIt, WasInserted) =
567  InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
568  unsigned MINumber = ResultIt->second;
569 
570  // There was an insertion.
571  if (WasInserted) {
572  LegalInstrNumber++;
573  IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
574  }
575 
576  UnsignedVec.push_back(MINumber);
577 
578  // Make sure we don't overflow or use any integers reserved by the DenseMap.
579  if (LegalInstrNumber >= IllegalInstrNumber)
580  report_fatal_error("Instruction mapping overflow!");
581 
582  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
583  "Tried to assign DenseMap tombstone or empty key to instruction.");
584  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
585  "Tried to assign DenseMap tombstone or empty key to instruction.");
586 
587  return MINumber;
588  }
589 
590  /// Maps \p *It to an illegal integer.
591  ///
592  /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
593  ///
594  /// \returns The integer that \p *It was mapped to.
595  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
596  unsigned MINumber = IllegalInstrNumber;
597 
598  InstrList.push_back(It);
599  UnsignedVec.push_back(IllegalInstrNumber);
600  IllegalInstrNumber--;
601 
602  assert(LegalInstrNumber < IllegalInstrNumber &&
603  "Instruction mapping overflow!");
604 
605  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
606  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
607 
608  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
609  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
610 
611  return MINumber;
612  }
613 
614  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
615  /// and appends it to \p UnsignedVec and \p InstrList.
616  ///
617  /// Two instructions are assigned the same integer if they are identical.
618  /// If an instruction is deemed unsafe to outline, then it will be assigned an
619  /// unique integer. The resulting mapping is placed into a suffix tree and
620  /// queried for candidates.
621  ///
622  /// \param MBB The \p MachineBasicBlock to be translated into integers.
623  /// \param TRI \p TargetRegisterInfo for the module.
624  /// \param TII \p TargetInstrInfo for the module.
625  void convertToUnsignedVec(MachineBasicBlock &MBB,
626  const TargetRegisterInfo &TRI,
627  const TargetInstrInfo &TII) {
628  unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
629 
630  for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
631  It++) {
632 
633  // Keep track of where this instruction is in the module.
634  switch (TII.getOutliningType(It, Flags)) {
635  case InstrType::Illegal:
636  mapToIllegalUnsigned(It);
637  break;
638 
639  case InstrType::Legal:
640  mapToLegalUnsigned(It);
641  break;
642 
644  mapToLegalUnsigned(It);
645  InstrList.push_back(It);
646  UnsignedVec.push_back(IllegalInstrNumber);
647  IllegalInstrNumber--;
648  break;
649 
651  break;
652  }
653  }
654 
655  // After we're done every insertion, uniquely terminate this part of the
656  // "string". This makes sure we won't match across basic block or function
657  // boundaries since the "end" is encoded uniquely and thus appears in no
658  // repeated substring.
659  InstrList.push_back(MBB.end());
660  UnsignedVec.push_back(IllegalInstrNumber);
661  IllegalInstrNumber--;
662  }
663 
664  InstructionMapper() {
665  // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
666  // changed.
667  assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
668  "DenseMapInfo<unsigned>'s empty key isn't -1!");
670  "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
671  }
672 };
673 
674 /// An interprocedural pass which finds repeated sequences of
675 /// instructions and replaces them with calls to functions.
676 ///
677 /// Each instruction is mapped to an unsigned integer and placed in a string.
678 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
679 /// is then repeatedly queried for repeated sequences of instructions. Each
680 /// non-overlapping repeated sequence is then placed in its own
681 /// \p MachineFunction and each instance is then replaced with a call to that
682 /// function.
683 struct MachineOutliner : public ModulePass {
684 
685  static char ID;
686 
687  /// Set to true if the outliner should consider functions with
688  /// linkonceodr linkage.
689  bool OutlineFromLinkOnceODRs = false;
690 
691  /// Set to true if the outliner should run on all functions in the module
692  /// considered safe for outlining.
693  /// Set to true by default for compatibility with llc's -run-pass option.
694  /// Set when the pass is constructed in TargetPassConfig.
695  bool RunOnAllFunctions = true;
696 
697  // Collection of IR functions created by the outliner.
698  std::vector<Function *> CreatedIRFunctions;
699 
700  StringRef getPassName() const override { return "Machine Outliner"; }
701 
702  void getAnalysisUsage(AnalysisUsage &AU) const override {
705  AU.setPreservesAll();
707  }
708 
709  MachineOutliner() : ModulePass(ID) {
711  }
712 
713  /// Find all repeated substrings that satisfy the outlining cost model.
714  ///
715  /// If a substring appears at least twice, then it must be represented by
716  /// an internal node which appears in at least two suffixes. Each suffix is
717  /// represented by a leaf node. To do this, we visit each internal node in
718  /// the tree, using the leaf children of each internal node. If an internal
719  /// node represents a beneficial substring, then we use each of its leaf
720  /// children to find the locations of its substring.
721  ///
722  /// \param ST A suffix tree to query.
723  /// \param TII TargetInstrInfo for the target.
724  /// \param Mapper Contains outlining mapping information.
725  /// \param[out] CandidateList Filled with candidates representing each
726  /// beneficial substring.
727  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
728  /// type of candidate.
729  ///
730  /// \returns The length of the longest candidate found.
731  unsigned
732  findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
733  InstructionMapper &Mapper,
734  std::vector<std::shared_ptr<Candidate>> &CandidateList,
735  std::vector<OutlinedFunction> &FunctionList);
736 
737  /// Replace the sequences of instructions represented by the
738  /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
739  /// described in \p FunctionList.
740  ///
741  /// \param M The module we are outlining from.
742  /// \param CandidateList A list of candidates to be outlined.
743  /// \param FunctionList A list of functions to be inserted into the module.
744  /// \param Mapper Contains the instruction mappings for the module.
745  bool outline(Module &M,
746  const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
747  std::vector<OutlinedFunction> &FunctionList,
748  InstructionMapper &Mapper);
749 
750  /// Creates a function for \p OF and inserts it into the module.
751  MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
752  InstructionMapper &Mapper);
753 
754  /// Find potential outlining candidates and store them in \p CandidateList.
755  ///
756  /// For each type of potential candidate, also build an \p OutlinedFunction
757  /// struct containing the information to build the function for that
758  /// candidate.
759  ///
760  /// \param[out] CandidateList Filled with outlining candidates for the module.
761  /// \param[out] FunctionList Filled with functions corresponding to each type
762  /// of \p Candidate.
763  /// \param ST The suffix tree for the module.
764  /// \param TII TargetInstrInfo for the module.
765  ///
766  /// \returns The length of the longest candidate found. 0 if there are none.
767  unsigned
768  buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
769  std::vector<OutlinedFunction> &FunctionList,
770  SuffixTree &ST, InstructionMapper &Mapper,
771  const TargetInstrInfo &TII);
772 
773  /// Helper function for pruneOverlaps.
774  /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
775  void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
776 
777  /// Remove any overlapping candidates that weren't handled by the
778  /// suffix tree's pruning method.
779  ///
780  /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
781  /// If a short candidate is chosen for outlining, then a longer candidate
782  /// which has that short candidate as a suffix is chosen, the tree's pruning
783  /// method will not find it. Thus, we need to prune before outlining as well.
784  ///
785  /// \param[in,out] CandidateList A list of outlining candidates.
786  /// \param[in,out] FunctionList A list of functions to be outlined.
787  /// \param Mapper Contains instruction mapping info for outlining.
788  /// \param MaxCandidateLen The length of the longest candidate.
789  /// \param TII TargetInstrInfo for the module.
790  void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
791  std::vector<OutlinedFunction> &FunctionList,
792  InstructionMapper &Mapper, unsigned MaxCandidateLen,
793  const TargetInstrInfo &TII);
794 
795  /// Construct a suffix tree on the instructions in \p M and outline repeated
796  /// strings from that tree.
797  bool runOnModule(Module &M) override;
798 
799  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
800  /// function for remark emission.
801  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
802  DISubprogram *SP;
803  for (const std::shared_ptr<Candidate> &C : OF.Candidates)
804  if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
805  return SP;
806  return nullptr;
807  }
808 };
809 
810 } // Anonymous namespace.
811 
812 char MachineOutliner::ID = 0;
813 
814 namespace llvm {
815 ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
816  MachineOutliner *OL = new MachineOutliner();
817  OL->RunOnAllFunctions = RunOnAllFunctions;
818  return OL;
819 }
820 
821 } // namespace llvm
822 
823 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
824  false)
825 
826 unsigned MachineOutliner::findCandidates(
827  SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
828  std::vector<std::shared_ptr<Candidate>> &CandidateList,
829  std::vector<OutlinedFunction> &FunctionList) {
830  CandidateList.clear();
831  FunctionList.clear();
832  unsigned MaxLen = 0;
833 
834  // FIXME: Visit internal nodes instead of leaves.
835  for (SuffixTreeNode *Leaf : ST.LeafVector) {
836  assert(Leaf && "Leaves in LeafVector cannot be null!");
837  if (!Leaf->IsInTree)
838  continue;
839 
840  assert(Leaf->Parent && "All leaves must have parents!");
841  SuffixTreeNode &Parent = *(Leaf->Parent);
842 
843  // If it doesn't appear enough, or we already outlined from it, skip it.
844  if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
845  continue;
846 
847  // Figure out if this candidate is beneficial.
848  unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
849 
850  // Too short to be beneficial; skip it.
851  // FIXME: This isn't necessarily true for, say, X86. If we factor in
852  // instruction lengths we need more information than this.
853  if (StringLen < 2)
854  continue;
855 
856  // If this is a beneficial class of candidate, then every one is stored in
857  // this vector.
858  std::vector<Candidate> CandidatesForRepeatedSeq;
859 
860  // Figure out the call overhead for each instance of the sequence.
861  for (auto &ChildPair : Parent.Children) {
862  SuffixTreeNode *M = ChildPair.second;
863 
864  if (M && M->IsInTree && M->isLeaf()) {
865  // Never visit this leaf again.
866  M->IsInTree = false;
867  unsigned StartIdx = M->SuffixIdx;
868  unsigned EndIdx = StartIdx + StringLen - 1;
869 
870  // Trick: Discard some candidates that would be incompatible with the
871  // ones we've already found for this sequence. This will save us some
872  // work in candidate selection.
873  //
874  // If two candidates overlap, then we can't outline them both. This
875  // happens when we have candidates that look like, say
876  //
877  // AA (where each "A" is an instruction).
878  //
879  // We might have some portion of the module that looks like this:
880  // AAAAAA (6 A's)
881  //
882  // In this case, there are 5 different copies of "AA" in this range, but
883  // at most 3 can be outlined. If only outlining 3 of these is going to
884  // be unbeneficial, then we ought to not bother.
885  //
886  // Note that two things DON'T overlap when they look like this:
887  // start1...end1 .... start2...end2
888  // That is, one must either
889  // * End before the other starts
890  // * Start after the other ends
891  if (std::all_of(CandidatesForRepeatedSeq.begin(),
892  CandidatesForRepeatedSeq.end(),
893  [&StartIdx, &EndIdx](const Candidate &C) {
894  return (EndIdx < C.getStartIdx() ||
895  StartIdx > C.getEndIdx());
896  })) {
897  // It doesn't overlap with anything, so we can outline it.
898  // Each sequence is over [StartIt, EndIt].
899  // Save the candidate and its location.
900 
901  MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
902  MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
903 
904  CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
905  EndIt, StartIt->getParent(),
906  FunctionList.size());
907  }
908  }
909  }
910 
911  // We've found something we might want to outline.
912  // Create an OutlinedFunction to store it and check if it'd be beneficial
913  // to outline.
914  TargetCostInfo TCI =
915  TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq);
916  std::vector<unsigned> Seq;
917  for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
918  Seq.push_back(ST.Str[i]);
919  OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
920  Seq, TCI);
921  unsigned Benefit = OF.getBenefit();
922 
923  // Is it better to outline this candidate than not?
924  if (Benefit < 1) {
925  // Outlining this candidate would take more instructions than not
926  // outlining.
927  // Emit a remark explaining why we didn't outline this candidate.
928  Candidate &C = CandidatesForRepeatedSeq.front();
929  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
930  MORE.emit([&]() {
931  MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
932  C.front()->getDebugLoc(), C.getMBB());
933  R << "Did not outline " << NV("Length", StringLen) << " instructions"
934  << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
935  << " locations."
936  << " Bytes from outlining all occurrences ("
937  << NV("OutliningCost", OF.getOutliningCost()) << ")"
938  << " >= Unoutlined instruction bytes ("
939  << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
940  << " (Also found at: ";
941 
942  // Tell the user the other places the candidate was found.
943  for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
944  R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
945  CandidatesForRepeatedSeq[i].front()->getDebugLoc());
946  if (i != e - 1)
947  R << ", ";
948  }
949 
950  R << ")";
951  return R;
952  });
953 
954  // Move to the next candidate.
955  continue;
956  }
957 
958  if (StringLen > MaxLen)
959  MaxLen = StringLen;
960 
961  // At this point, the candidate class is seen as beneficial. Set their
962  // benefit values and save them in the candidate list.
963  std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
964  for (Candidate &C : CandidatesForRepeatedSeq) {
965  C.Benefit = Benefit;
966  C.TCI = TCI;
967  std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
968  CandidateList.push_back(Cptr);
969  CandidatesForFn.push_back(Cptr);
970  }
971 
972  FunctionList.push_back(OF);
973  FunctionList.back().Candidates = CandidatesForFn;
974 
975  // Move to the next function.
976  Parent.IsInTree = false;
977  }
978 
979  return MaxLen;
980 }
981 
982 // Remove C from the candidate space, and update its OutlinedFunction.
983 void MachineOutliner::prune(Candidate &C,
984  std::vector<OutlinedFunction> &FunctionList) {
985  // Get the OutlinedFunction associated with this Candidate.
986  OutlinedFunction &F = FunctionList[C.FunctionIdx];
987 
988  // Update C's associated function's occurrence count.
989  F.decrement();
990 
991  // Remove C from the CandidateList.
992  C.InCandidateList = false;
993 
994  LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
995  dbgs() << "--- Num fns left for candidate: "
996  << F.getOccurrenceCount() << "\n";
997  dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
998  << "\n";);
999 }
1000 
1001 void MachineOutliner::pruneOverlaps(
1002  std::vector<std::shared_ptr<Candidate>> &CandidateList,
1003  std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
1004  unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
1005 
1006  // Return true if this candidate became unbeneficial for outlining in a
1007  // previous step.
1008  auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
1009 
1010  // Check if the candidate was removed in a previous step.
1011  if (!C.InCandidateList)
1012  return true;
1013 
1014  // C must be alive. Check if we should remove it.
1015  if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
1016  prune(C, FunctionList);
1017  return true;
1018  }
1019 
1020  // C is in the list, and F is still beneficial.
1021  return false;
1022  };
1023 
1024  // TODO: Experiment with interval trees or other interval-checking structures
1025  // to lower the time complexity of this function.
1026  // TODO: Can we do better than the simple greedy choice?
1027  // Check for overlaps in the range.
1028  // This is O(MaxCandidateLen * CandidateList.size()).
1029  for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
1030  It++) {
1031  Candidate &C1 = **It;
1032 
1033  // If C1 was already pruned, or its function is no longer beneficial for
1034  // outlining, move to the next candidate.
1035  if (ShouldSkipCandidate(C1))
1036  continue;
1037 
1038  // The minimum start index of any candidate that could overlap with this
1039  // one.
1040  unsigned FarthestPossibleIdx = 0;
1041 
1042  // Either the index is 0, or it's at most MaxCandidateLen indices away.
1043  if (C1.getStartIdx() > MaxCandidateLen)
1044  FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
1045 
1046  // Compare against the candidates in the list that start at most
1047  // FarthestPossibleIdx indices away from C1. There are at most
1048  // MaxCandidateLen of these.
1049  for (auto Sit = It + 1; Sit != Et; Sit++) {
1050  Candidate &C2 = **Sit;
1051 
1052  // Is this candidate too far away to overlap?
1053  if (C2.getStartIdx() < FarthestPossibleIdx)
1054  break;
1055 
1056  // If C2 was already pruned, or its function is no longer beneficial for
1057  // outlining, move to the next candidate.
1058  if (ShouldSkipCandidate(C2))
1059  continue;
1060 
1061  // Do C1 and C2 overlap?
1062  //
1063  // Not overlapping:
1064  // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
1065  //
1066  // We sorted our candidate list so C2Start <= C1Start. We know that
1067  // C2End > C2Start since each candidate has length >= 2. Therefore, all we
1068  // have to check is C2End < C2Start to see if we overlap.
1069  if (C2.getEndIdx() < C1.getStartIdx())
1070  continue;
1071 
1072  // C1 and C2 overlap.
1073  // We need to choose the better of the two.
1074  //
1075  // Approximate this by picking the one which would have saved us the
1076  // most instructions before any pruning.
1077 
1078  // Is C2 a better candidate?
1079  if (C2.Benefit > C1.Benefit) {
1080  // Yes, so prune C1. Since C1 is dead, we don't have to compare it
1081  // against anything anymore, so break.
1082  prune(C1, FunctionList);
1083  break;
1084  }
1085 
1086  // Prune C2 and move on to the next candidate.
1087  prune(C2, FunctionList);
1088  }
1089  }
1090 }
1091 
1092 unsigned MachineOutliner::buildCandidateList(
1093  std::vector<std::shared_ptr<Candidate>> &CandidateList,
1094  std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
1095  InstructionMapper &Mapper, const TargetInstrInfo &TII) {
1096 
1097  std::vector<unsigned> CandidateSequence; // Current outlining candidate.
1098  unsigned MaxCandidateLen = 0; // Length of the longest candidate.
1099 
1100  MaxCandidateLen =
1101  findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
1102 
1103  // Sort the candidates in decending order. This will simplify the outlining
1104  // process when we have to remove the candidates from the mapping by
1105  // allowing us to cut them out without keeping track of an offset.
1106  std::stable_sort(
1107  CandidateList.begin(), CandidateList.end(),
1108  [](const std::shared_ptr<Candidate> &LHS,
1109  const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
1110 
1111  return MaxCandidateLen;
1112 }
1113 
1115 MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
1116  InstructionMapper &Mapper) {
1117 
1118  // Create the function name. This should be unique. For now, just hash the
1119  // module name and include it in the function name plus the number of this
1120  // function.
1121  std::ostringstream NameStream;
1122  NameStream << "OUTLINED_FUNCTION_" << OF.Name;
1123 
1124  // Create the function using an IR-level function.
1125  LLVMContext &C = M.getContext();
1127  M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
1128  assert(F && "Function was null!");
1129 
1130  // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1131  // which gives us better results when we outline from linkonceodr functions.
1134 
1135  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
1136  // necessary.
1137 
1138  // Set optsize/minsize, so we don't insert padding between outlined
1139  // functions.
1140  F->addFnAttr(Attribute::OptimizeForSize);
1141  F->addFnAttr(Attribute::MinSize);
1142 
1143  // Save F so that we can add debug info later if we need to.
1144  CreatedIRFunctions.push_back(F);
1145 
1146  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1147  IRBuilder<> Builder(EntryBB);
1148  Builder.CreateRetVoid();
1149 
1150  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1152  MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1153  const TargetSubtargetInfo &STI = MF.getSubtarget();
1154  const TargetInstrInfo &TII = *STI.getInstrInfo();
1155 
1156  // Insert the new function into the module.
1157  MF.insert(MF.begin(), &MBB);
1158 
1159  // Copy over the instructions for the function using the integer mappings in
1160  // its sequence.
1161  for (unsigned Str : OF.Sequence) {
1162  MachineInstr *NewMI =
1163  MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
1164  NewMI->dropMemRefs();
1165 
1166  // Don't keep debug information for outlined instructions.
1167  NewMI->setDebugLoc(DebugLoc());
1168  MBB.insert(MBB.end(), NewMI);
1169  }
1170 
1171  TII.buildOutlinedFrame(MBB, MF, OF.TCI);
1172 
1173  // If there's a DISubprogram associated with this outlined function, then
1174  // emit debug info for the outlined function.
1175  if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1176  // We have a DISubprogram. Get its DICompileUnit.
1177  DICompileUnit *CU = SP->getUnit();
1178  DIBuilder DB(M, true, CU);
1179  DIFile *Unit = SP->getFile();
1180  Mangler Mg;
1181 
1182  // Walk over each IR function we created in the outliner and create
1183  // DISubprograms for each function.
1184  for (Function *F : CreatedIRFunctions) {
1185  // Get the mangled name of the function for the linkage name.
1186  std::string Dummy;
1187  llvm::raw_string_ostream MangledNameStream(Dummy);
1188  Mg.getNameWithPrefix(MangledNameStream, F, false);
1189 
1190  DISubprogram *SP = DB.createFunction(
1191  Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
1192  Unit /* File */,
1193  0 /* Line 0 is reserved for compiler-generated code. */,
1195  DB.getOrCreateTypeArray(None)), /* void type */
1196  false, true, 0, /* Line 0 is reserved for compiler-generated code. */
1197  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1198  true /* Outlined code is optimized code by definition. */);
1199 
1200  // Don't add any new variables to the subprogram.
1201  DB.finalizeSubprogram(SP);
1202 
1203  // Attach subprogram to the function.
1204  F->setSubprogram(SP);
1205  }
1206 
1207  // We're done with the DIBuilder.
1208  DB.finalize();
1209  }
1210 
1211  // Outlined functions shouldn't preserve liveness.
1212  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
1213  MF.getRegInfo().freezeReservedRegs(MF);
1214  return &MF;
1215 }
1216 
1217 bool MachineOutliner::outline(
1218  Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
1219  std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
1220 
1221  bool OutlinedSomething = false;
1222  // Replace the candidates with calls to their respective outlined functions.
1223  for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
1224  Candidate &C = *Cptr;
1225  // Was the candidate removed during pruneOverlaps?
1226  if (!C.InCandidateList)
1227  continue;
1228 
1229  // If not, then look at its OutlinedFunction.
1230  OutlinedFunction &OF = FunctionList[C.FunctionIdx];
1231 
1232  // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
1233  if (OF.getBenefit() < 1)
1234  continue;
1235 
1236  // Does this candidate have a function yet?
1237  if (!OF.MF) {
1238  OF.MF = createOutlinedFunction(M, OF, Mapper);
1239  MachineBasicBlock *MBB = &*OF.MF->begin();
1240 
1241  // Output a remark telling the user that an outlined function was created,
1242  // and explaining where it came from.
1243  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
1244  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
1245  MBB->findDebugLoc(MBB->begin()), MBB);
1246  R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
1247  << " bytes by "
1248  << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
1249  << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
1250  << " locations. "
1251  << "(Found at: ";
1252 
1253  // Tell the user the other places the candidate was found.
1254  for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
1255 
1256  // Skip over things that were pruned.
1257  if (!OF.Candidates[i]->InCandidateList)
1258  continue;
1259 
1260  R << NV(
1261  (Twine("StartLoc") + Twine(i)).str(),
1262  OF.Candidates[i]->front()->getDebugLoc());
1263  if (i != e - 1)
1264  R << ", ";
1265  }
1266 
1267  R << ")";
1268 
1269  MORE.emit(R);
1270  FunctionsCreated++;
1271  }
1272 
1273  MachineFunction *MF = OF.MF;
1274  MachineBasicBlock &MBB = *C.getMBB();
1275  MachineBasicBlock::iterator StartIt = C.front();
1276  MachineBasicBlock::iterator EndIt = C.back();
1277  assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
1278  assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
1279 
1280  const TargetSubtargetInfo &STI = MF->getSubtarget();
1281  const TargetInstrInfo &TII = *STI.getInstrInfo();
1282 
1283  // Insert a call to the new function and erase the old sequence.
1284  auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C.TCI);
1285 
1286  // If the caller tracks liveness, then we need to make sure that anything
1287  // we outline doesn't break liveness assumptions.
1288  // The outlined functions themselves currently don't track liveness, but
1289  // we should make sure that the ranges we yank things out of aren't
1290  // wrong.
1291  if (MBB.getParent()->getProperties().hasProperty(
1293  // Helper lambda for adding implicit def operands to the call instruction.
1294  auto CopyDefs = [&CallInst](MachineInstr &MI) {
1295  for (MachineOperand &MOP : MI.operands()) {
1296  // Skip over anything that isn't a register.
1297  if (!MOP.isReg())
1298  continue;
1299 
1300  // If it's a def, add it to the call instruction.
1301  if (MOP.isDef())
1302  CallInst->addOperand(
1303  MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
1304  true /* isImp = true */));
1305  }
1306  };
1307 
1308  // Copy over the defs in the outlined range.
1309  // First inst in outlined range <-- Anything that's defined in this
1310  // ... .. range has to be added as an implicit
1311  // Last inst in outlined range <-- def to the call instruction.
1312  std::for_each(CallInst, std::next(EndIt), CopyDefs);
1313  }
1314 
1315  // Erase from the point after where the call was inserted up to, and
1316  // including, the final instruction in the sequence.
1317  // Erase needs one past the end, so we need std::next there too.
1318  MBB.erase(std::next(StartIt), std::next(EndIt));
1319  OutlinedSomething = true;
1320 
1321  // Statistics.
1322  NumOutlined++;
1323  }
1324 
1325  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1326 
1327  return OutlinedSomething;
1328 }
1329 
1330 bool MachineOutliner::runOnModule(Module &M) {
1331  // Check if there's anything in the module. If it's empty, then there's
1332  // nothing to outline.
1333  if (M.empty())
1334  return false;
1335 
1336  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1337  const TargetSubtargetInfo &STI =
1338  MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
1339  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
1340  const TargetInstrInfo *TII = STI.getInstrInfo();
1341 
1342  // If the user passed -enable-machine-outliner=always or
1343  // -enable-machine-outliner, the pass will run on all functions in the module.
1344  // Otherwise, if the target supports default outlining, it will run on all
1345  // functions deemed by the target to be worth outlining from by default. Tell
1346  // the user how the outliner is running.
1347  LLVM_DEBUG(
1348  dbgs() << "Machine Outliner: Running on ";
1349  if (RunOnAllFunctions)
1350  dbgs() << "all functions";
1351  else
1352  dbgs() << "target-default functions";
1353  dbgs() << "\n"
1354  );
1355 
1356  // If the user specifies that they want to outline from linkonceodrs, set
1357  // it here.
1358  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1359 
1360  InstructionMapper Mapper;
1361 
1362  // Build instruction mappings for each function in the module. Start by
1363  // iterating over each Function in M.
1364  for (Function &F : M) {
1365 
1366  // If there's nothing in F, then there's no reason to try and outline from
1367  // it.
1368  if (F.empty())
1369  continue;
1370 
1371  // There's something in F. Check if it has a MachineFunction associated with
1372  // it.
1374 
1375  // If it doesn't, then there's nothing to outline from. Move to the next
1376  // Function.
1377  if (!MF)
1378  continue;
1379 
1380  if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
1381  continue;
1382 
1383  // We have a MachineFunction. Ask the target if it's suitable for outlining.
1384  // If it isn't, then move on to the next Function in the module.
1385  if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
1386  continue;
1387 
1388  // We have a function suitable for outlining. Iterate over every
1389  // MachineBasicBlock in MF and try to map its instructions to a list of
1390  // unsigned integers.
1391  for (MachineBasicBlock &MBB : *MF) {
1392  // If there isn't anything in MBB, then there's no point in outlining from
1393  // it.
1394  if (MBB.empty())
1395  continue;
1396 
1397  // Check if MBB could be the target of an indirect branch. If it is, then
1398  // we don't want to outline from it.
1399  if (MBB.hasAddressTaken())
1400  continue;
1401 
1402  // MBB is suitable for outlining. Map it to a list of unsigneds.
1403  Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
1404  }
1405  }
1406 
1407  // Construct a suffix tree, use it to find candidates, and then outline them.
1408  SuffixTree ST(Mapper.UnsignedVec);
1409  std::vector<std::shared_ptr<Candidate>> CandidateList;
1410  std::vector<OutlinedFunction> FunctionList;
1411 
1412  // Find all of the outlining candidates.
1413  unsigned MaxCandidateLen =
1414  buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
1415 
1416  // Remove candidates that overlap with other candidates.
1417  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
1418 
1419  // Outline each of the candidates and return true if something was outlined.
1420  bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
1421 
1422  return OutlinedSomething;
1423 }
void finalize()
Construct any deferred debug info descriptors.
Definition: DIBuilder.cpp:69
const NoneType None
Definition: None.h:24
uint64_t CallInst * C
DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags=DINode::FlagZero, bool isOptimized=false, DITemplateParameterArray TParams=nullptr, DISubprogram *Decl=nullptr, DITypeArray ThrownTypes=nullptr)
Create a new descriptor for the specified subprogram.
Definition: DIBuilder.cpp:752
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:142
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
const MachineFunctionProperties & getProperties() const
Get the function properties.
DIFile * getFile() const
Diagnostic information for applied optimization remarks.
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
AnalysisUsage & addRequired()
Definition: BitVector.h:921
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const HexagonInstrInfo * TII
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:242
DISubroutineType * createSubroutineType(DITypeRefArray ParameterTypes, DINode::DIFlags Flags=DINode::FlagZero, unsigned CC=0)
Create subroutine type.
Definition: DIBuilder.cpp:498
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
Subprogram description.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:92
bool empty() const
Definition: Module.h:586
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) unsigned MachineOutliner
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const
Return true if the function can safely be outlined from.
TargetInstrInfo - Interface to description of machine instruction set.
DITypeRefArray getOrCreateTypeArray(ArrayRef< Metadata *> Elements)
Get a DITypeRefArray, create one if required.
Definition: DIBuilder.cpp:612
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const
Return true if the function should be outlined from by default.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void setSubprogram(DISubprogram *SP)
Set the attached subprogram.
Definition: Metadata.cpp:1504
bool isLeaf(ID id)
Returns true if the intrinsic is a leaf, i.e.
Definition: Function.cpp:990
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const
Returns how or if MI should be outlined.
Contains all data structures shared between the outliner implemented in MachineOutliner.cpp and target implementations of the outliner.
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
Definition: Allocator.h:436
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineFunction & getOrCreateMachineFunction(const Function &F)
Returns the MachineFunction constructed for the IR function F.
ModulePass * createMachineOutlinerPass(bool RunOnAllFunctions=true)
This pass performs outlining on machine instructions directly before printing assembly.
virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::TargetCostInfo &TCI) const
Insert a custom frame for outlined functions.
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:493
void initializeMachineOutlinerPass(PassRegistry &)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1032
void dropMemRefs()
Clear this MachineInstr&#39;s memory reference descriptor list.
The optimization diagnostic interface.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr...
MachineOperand class - Representation of each machine instruction operand.
#define MORE()
Definition: regcomp.c:251
virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::TargetCostInfo &TCI) const
Insert a call to an outlined function into the program.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:387
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:444
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
void setPreservesAll()
Set by analyses that do not transform their input at all.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:60
ReturnInst * CreateRetVoid()
Create a &#39;ret void&#39; instruction.
Definition: IRBuilder.h:812
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:215
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
#define N
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
iterator begin()
Definition: Module.h:577
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
void finalizeSubprogram(DISubprogram *SP)
Finalize a specific subprogram - no new variables may be added to this subprogram afterwards...
Definition: DIBuilder.cpp:49
Diagnostic information for missed-optimization remarks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasProperty(Property P) const
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:477
aarch64 promote const
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable&#39;s name.
Definition: Mangler.cpp:112
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:216
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:901
static cl::opt< bool > EnableLinkOnceODROutlining("enable-linkonceodr-outlining", cl::Hidden, cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false))
#define LLVM_DEBUG(X)
Definition: Debug.h:119
virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const
Returns target-defined flags defining properties of the MBB for the outliner.
#define DEBUG_TYPE
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
This class contains meta information specific to a module.