LLVM  9.0.0svn
MachineOutliner.cpp
Go to the documentation of this file.
1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Replaces repeated sequences of instructions with function calls.
11 ///
12 /// This works by placing every instruction from every basic block in a
13 /// suffix tree, and repeatedly querying that tree for repeated sequences of
14 /// instructions. If a sequence of instructions appears often, then it ought
15 /// to be beneficial to pull out into a function.
16 ///
17 /// The MachineOutliner communicates with a given target using hooks defined in
18 /// TargetInstrInfo.h. The target supplies the outliner with information on how
19 /// a specific sequence of instructions should be outlined. This information
20 /// is used to deduce the number of instructions necessary to
21 ///
22 /// * Create an outlined function
23 /// * Call that outlined function
24 ///
25 /// Targets must implement
26 /// * getOutliningCandidateInfo
27 /// * buildOutlinedFrame
28 /// * insertOutlinedCall
29 /// * isFunctionSafeToOutlineFrom
30 ///
31 /// in order to make use of the MachineOutliner.
32 ///
33 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
34 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
35 /// how this pass works, the talk is available on YouTube at
36 ///
37 /// https://www.youtube.com/watch?v=yorld-WSOeU
38 ///
39 /// The slides for the talk are available at
40 ///
41 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42 ///
43 /// The talk provides an overview of how the outliner finds candidates and
44 /// ultimately outlines them. It describes how the main data structure for this
45 /// pass, the suffix tree, is queried and purged for candidates. It also gives
46 /// a simplified suffix tree construction algorithm for suffix trees based off
47 /// of the algorithm actually used here, Ukkonen's algorithm.
48 ///
49 /// For the original RFC for this pass, please see
50 ///
51 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52 ///
53 /// For more information on the suffix tree data structure, please see
54 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55 ///
56 //===----------------------------------------------------------------------===//
58 #include "llvm/ADT/DenseMap.h"
59 #include "llvm/ADT/Statistic.h"
60 #include "llvm/ADT/Twine.h"
65 #include "llvm/CodeGen/Passes.h"
68 #include "llvm/IR/DIBuilder.h"
69 #include "llvm/IR/IRBuilder.h"
70 #include "llvm/IR/Mangler.h"
71 #include "llvm/Support/Allocator.h"
73 #include "llvm/Support/Debug.h"
75 #include <functional>
76 #include <map>
77 #include <sstream>
78 #include <tuple>
79 #include <vector>
80 
81 #define DEBUG_TYPE "machine-outliner"
82 
83 using namespace llvm;
84 using namespace ore;
85 using namespace outliner;
86 
87 STATISTIC(NumOutlined, "Number of candidates outlined");
88 STATISTIC(FunctionsCreated, "Number of functions created");
89 
90 // Set to true if the user wants the outliner to run on linkonceodr linkage
91 // functions. This is false by default because the linker can dedupe linkonceodr
92 // functions. Since the outliner is confined to a single module (modulo LTO),
93 // this is off by default. It should, however, be the default behaviour in
94 // LTO.
96  "enable-linkonceodr-outlining",
97  cl::Hidden,
98  cl::desc("Enable the machine outliner on linkonceodr functions"),
99  cl::init(false));
100 
101 namespace {
102 
103 /// Represents an undefined index in the suffix tree.
104 const unsigned EmptyIdx = -1;
105 
106 /// A node in a suffix tree which represents a substring or suffix.
107 ///
108 /// Each node has either no children or at least two children, with the root
109 /// being a exception in the empty tree.
110 ///
111 /// Children are represented as a map between unsigned integers and nodes. If
112 /// a node N has a child M on unsigned integer k, then the mapping represented
113 /// by N is a proper prefix of the mapping represented by M. Note that this,
114 /// although similar to a trie is somewhat different: each node stores a full
115 /// substring of the full mapping rather than a single character state.
116 ///
117 /// Each internal node contains a pointer to the internal node representing
118 /// the same string, but with the first character chopped off. This is stored
119 /// in \p Link. Each leaf node stores the start index of its respective
120 /// suffix in \p SuffixIdx.
121 struct SuffixTreeNode {
122 
123  /// The children of this node.
124  ///
125  /// A child existing on an unsigned integer implies that from the mapping
126  /// represented by the current node, there is a way to reach another
127  /// mapping by tacking that character on the end of the current string.
129 
130  /// The start index of this node's substring in the main string.
131  unsigned StartIdx = EmptyIdx;
132 
133  /// The end index of this node's substring in the main string.
134  ///
135  /// Every leaf node must have its \p EndIdx incremented at the end of every
136  /// step in the construction algorithm. To avoid having to update O(N)
137  /// nodes individually at the end of every step, the end index is stored
138  /// as a pointer.
139  unsigned *EndIdx = nullptr;
140 
141  /// For leaves, the start index of the suffix represented by this node.
142  ///
143  /// For all other nodes, this is ignored.
144  unsigned SuffixIdx = EmptyIdx;
145 
146  /// For internal nodes, a pointer to the internal node representing
147  /// the same sequence with the first character chopped off.
148  ///
149  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
150  /// Ukkonen's algorithm does to achieve linear-time construction is
151  /// keep track of which node the next insert should be at. This makes each
152  /// insert O(1), and there are a total of O(N) inserts. The suffix link
153  /// helps with inserting children of internal nodes.
154  ///
155  /// Say we add a child to an internal node with associated mapping S. The
156  /// next insertion must be at the node representing S - its first character.
157  /// This is given by the way that we iteratively build the tree in Ukkonen's
158  /// algorithm. The main idea is to look at the suffixes of each prefix in the
159  /// string, starting with the longest suffix of the prefix, and ending with
160  /// the shortest. Therefore, if we keep pointers between such nodes, we can
161  /// move to the next insertion point in O(1) time. If we don't, then we'd
162  /// have to query from the root, which takes O(N) time. This would make the
163  /// construction algorithm O(N^2) rather than O(N).
164  SuffixTreeNode *Link = nullptr;
165 
166  /// The length of the string formed by concatenating the edge labels from the
167  /// root to this node.
168  unsigned ConcatLen = 0;
169 
170  /// Returns true if this node is a leaf.
171  bool isLeaf() const { return SuffixIdx != EmptyIdx; }
172 
173  /// Returns true if this node is the root of its owning \p SuffixTree.
174  bool isRoot() const { return StartIdx == EmptyIdx; }
175 
176  /// Return the number of elements in the substring associated with this node.
177  size_t size() const {
178 
179  // Is it the root? If so, it's the empty string so return 0.
180  if (isRoot())
181  return 0;
182 
183  assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
184 
185  // Size = the number of elements in the string.
186  // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
187  return *EndIdx - StartIdx + 1;
188  }
189 
190  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
191  : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
192 
193  SuffixTreeNode() {}
194 };
195 
196 /// A data structure for fast substring queries.
197 ///
198 /// Suffix trees represent the suffixes of their input strings in their leaves.
199 /// A suffix tree is a type of compressed trie structure where each node
200 /// represents an entire substring rather than a single character. Each leaf
201 /// of the tree is a suffix.
202 ///
203 /// A suffix tree can be seen as a type of state machine where each state is a
204 /// substring of the full string. The tree is structured so that, for a string
205 /// of length N, there are exactly N leaves in the tree. This structure allows
206 /// us to quickly find repeated substrings of the input string.
207 ///
208 /// In this implementation, a "string" is a vector of unsigned integers.
209 /// These integers may result from hashing some data type. A suffix tree can
210 /// contain 1 or many strings, which can then be queried as one large string.
211 ///
212 /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
213 /// suffix tree construction. Ukkonen's algorithm is explained in more detail
214 /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
215 /// paper is available at
216 ///
217 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
218 class SuffixTree {
219 public:
220  /// Each element is an integer representing an instruction in the module.
221  ArrayRef<unsigned> Str;
222 
223  /// A repeated substring in the tree.
224  struct RepeatedSubstring {
225  /// The length of the string.
226  unsigned Length;
227 
228  /// The start indices of each occurrence.
229  std::vector<unsigned> StartIndices;
230  };
231 
232 private:
233  /// Maintains each node in the tree.
235 
236  /// The root of the suffix tree.
237  ///
238  /// The root represents the empty string. It is maintained by the
239  /// \p NodeAllocator like every other node in the tree.
240  SuffixTreeNode *Root = nullptr;
241 
242  /// Maintains the end indices of the internal nodes in the tree.
243  ///
244  /// Each internal node is guaranteed to never have its end index change
245  /// during the construction algorithm; however, leaves must be updated at
246  /// every step. Therefore, we need to store leaf end indices by reference
247  /// to avoid updating O(N) leaves at every step of construction. Thus,
248  /// every internal node must be allocated its own end index.
249  BumpPtrAllocator InternalEndIdxAllocator;
250 
251  /// The end index of each leaf in the tree.
252  unsigned LeafEndIdx = -1;
253 
254  /// Helper struct which keeps track of the next insertion point in
255  /// Ukkonen's algorithm.
256  struct ActiveState {
257  /// The next node to insert at.
258  SuffixTreeNode *Node;
259 
260  /// The index of the first character in the substring currently being added.
261  unsigned Idx = EmptyIdx;
262 
263  /// The length of the substring we have to add at the current step.
264  unsigned Len = 0;
265  };
266 
267  /// The point the next insertion will take place at in the
268  /// construction algorithm.
269  ActiveState Active;
270 
271  /// Allocate a leaf node and add it to the tree.
272  ///
273  /// \param Parent The parent of this node.
274  /// \param StartIdx The start index of this node's associated string.
275  /// \param Edge The label on the edge leaving \p Parent to this node.
276  ///
277  /// \returns A pointer to the allocated leaf node.
278  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
279  unsigned Edge) {
280 
281  assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
282 
283  SuffixTreeNode *N = new (NodeAllocator.Allocate())
284  SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
285  Parent.Children[Edge] = N;
286 
287  return N;
288  }
289 
290  /// Allocate an internal node and add it to the tree.
291  ///
292  /// \param Parent The parent of this node. Only null when allocating the root.
293  /// \param StartIdx The start index of this node's associated string.
294  /// \param EndIdx The end index of this node's associated string.
295  /// \param Edge The label on the edge leaving \p Parent to this node.
296  ///
297  /// \returns A pointer to the allocated internal node.
298  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
299  unsigned EndIdx, unsigned Edge) {
300 
301  assert(StartIdx <= EndIdx && "String can't start after it ends!");
302  assert(!(!Parent && StartIdx != EmptyIdx) &&
303  "Non-root internal nodes must have parents!");
304 
305  unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
306  SuffixTreeNode *N = new (NodeAllocator.Allocate())
307  SuffixTreeNode(StartIdx, E, Root);
308  if (Parent)
309  Parent->Children[Edge] = N;
310 
311  return N;
312  }
313 
314  /// Set the suffix indices of the leaves to the start indices of their
315  /// respective suffixes.
316  ///
317  /// \param[in] CurrNode The node currently being visited.
318  /// \param CurrNodeLen The concatenation of all node sizes from the root to
319  /// this node. Used to produce suffix indices.
320  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
321 
322  bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
323 
324  // Store the concatenation of lengths down from the root.
325  CurrNode.ConcatLen = CurrNodeLen;
326  // Traverse the tree depth-first.
327  for (auto &ChildPair : CurrNode.Children) {
328  assert(ChildPair.second && "Node had a null child!");
329  setSuffixIndices(*ChildPair.second,
330  CurrNodeLen + ChildPair.second->size());
331  }
332 
333  // Is this node a leaf? If it is, give it a suffix index.
334  if (IsLeaf)
335  CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
336  }
337 
338  /// Construct the suffix tree for the prefix of the input ending at
339  /// \p EndIdx.
340  ///
341  /// Used to construct the full suffix tree iteratively. At the end of each
342  /// step, the constructed suffix tree is either a valid suffix tree, or a
343  /// suffix tree with implicit suffixes. At the end of the final step, the
344  /// suffix tree is a valid tree.
345  ///
346  /// \param EndIdx The end index of the current prefix in the main string.
347  /// \param SuffixesToAdd The number of suffixes that must be added
348  /// to complete the suffix tree at the current phase.
349  ///
350  /// \returns The number of suffixes that have not been added at the end of
351  /// this step.
352  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
353  SuffixTreeNode *NeedsLink = nullptr;
354 
355  while (SuffixesToAdd > 0) {
356 
357  // Are we waiting to add anything other than just the last character?
358  if (Active.Len == 0) {
359  // If not, then say the active index is the end index.
360  Active.Idx = EndIdx;
361  }
362 
363  assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
364 
365  // The first character in the current substring we're looking at.
366  unsigned FirstChar = Str[Active.Idx];
367 
368  // Have we inserted anything starting with FirstChar at the current node?
369  if (Active.Node->Children.count(FirstChar) == 0) {
370  // If not, then we can just insert a leaf and move too the next step.
371  insertLeaf(*Active.Node, EndIdx, FirstChar);
372 
373  // The active node is an internal node, and we visited it, so it must
374  // need a link if it doesn't have one.
375  if (NeedsLink) {
376  NeedsLink->Link = Active.Node;
377  NeedsLink = nullptr;
378  }
379  } else {
380  // There's a match with FirstChar, so look for the point in the tree to
381  // insert a new node.
382  SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
383 
384  unsigned SubstringLen = NextNode->size();
385 
386  // Is the current suffix we're trying to insert longer than the size of
387  // the child we want to move to?
388  if (Active.Len >= SubstringLen) {
389  // If yes, then consume the characters we've seen and move to the next
390  // node.
391  Active.Idx += SubstringLen;
392  Active.Len -= SubstringLen;
393  Active.Node = NextNode;
394  continue;
395  }
396 
397  // Otherwise, the suffix we're trying to insert must be contained in the
398  // next node we want to move to.
399  unsigned LastChar = Str[EndIdx];
400 
401  // Is the string we're trying to insert a substring of the next node?
402  if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
403  // If yes, then we're done for this step. Remember our insertion point
404  // and move to the next end index. At this point, we have an implicit
405  // suffix tree.
406  if (NeedsLink && !Active.Node->isRoot()) {
407  NeedsLink->Link = Active.Node;
408  NeedsLink = nullptr;
409  }
410 
411  Active.Len++;
412  break;
413  }
414 
415  // The string we're trying to insert isn't a substring of the next node,
416  // but matches up to a point. Split the node.
417  //
418  // For example, say we ended our search at a node n and we're trying to
419  // insert ABD. Then we'll create a new node s for AB, reduce n to just
420  // representing C, and insert a new leaf node l to represent d. This
421  // allows us to ensure that if n was a leaf, it remains a leaf.
422  //
423  // | ABC ---split---> | AB
424  // n s
425  // C / \ D
426  // n l
427 
428  // The node s from the diagram
429  SuffixTreeNode *SplitNode =
430  insertInternalNode(Active.Node, NextNode->StartIdx,
431  NextNode->StartIdx + Active.Len - 1, FirstChar);
432 
433  // Insert the new node representing the new substring into the tree as
434  // a child of the split node. This is the node l from the diagram.
435  insertLeaf(*SplitNode, EndIdx, LastChar);
436 
437  // Make the old node a child of the split node and update its start
438  // index. This is the node n from the diagram.
439  NextNode->StartIdx += Active.Len;
440  SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
441 
442  // SplitNode is an internal node, update the suffix link.
443  if (NeedsLink)
444  NeedsLink->Link = SplitNode;
445 
446  NeedsLink = SplitNode;
447  }
448 
449  // We've added something new to the tree, so there's one less suffix to
450  // add.
451  SuffixesToAdd--;
452 
453  if (Active.Node->isRoot()) {
454  if (Active.Len > 0) {
455  Active.Len--;
456  Active.Idx = EndIdx - SuffixesToAdd + 1;
457  }
458  } else {
459  // Start the next phase at the next smallest suffix.
460  Active.Node = Active.Node->Link;
461  }
462  }
463 
464  return SuffixesToAdd;
465  }
466 
467 public:
468  /// Construct a suffix tree from a sequence of unsigned integers.
469  ///
470  /// \param Str The string to construct the suffix tree for.
471  SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
472  Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
473  Active.Node = Root;
474 
475  // Keep track of the number of suffixes we have to add of the current
476  // prefix.
477  unsigned SuffixesToAdd = 0;
478  Active.Node = Root;
479 
480  // Construct the suffix tree iteratively on each prefix of the string.
481  // PfxEndIdx is the end index of the current prefix.
482  // End is one past the last element in the string.
483  for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
484  PfxEndIdx++) {
485  SuffixesToAdd++;
486  LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
487  SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
488  }
489 
490  // Set the suffix indices of each leaf.
491  assert(Root && "Root node can't be nullptr!");
492  setSuffixIndices(*Root, 0);
493  }
494 
495 
496  /// Iterator for finding all repeated substrings in the suffix tree.
497  struct RepeatedSubstringIterator {
498  private:
499  /// The current node we're visiting.
500  SuffixTreeNode *N = nullptr;
501 
502  /// The repeated substring associated with this node.
503  RepeatedSubstring RS;
504 
505  /// The nodes left to visit.
506  std::vector<SuffixTreeNode *> ToVisit;
507 
508  /// The minimum length of a repeated substring to find.
509  /// Since we're outlining, we want at least two instructions in the range.
510  /// FIXME: This may not be true for targets like X86 which support many
511  /// instruction lengths.
512  const unsigned MinLength = 2;
513 
514  /// Move the iterator to the next repeated substring.
515  void advance() {
516  // Clear the current state. If we're at the end of the range, then this
517  // is the state we want to be in.
518  RS = RepeatedSubstring();
519  N = nullptr;
520 
521  // Each leaf node represents a repeat of a string.
522  std::vector<SuffixTreeNode *> LeafChildren;
523 
524  // Continue visiting nodes until we find one which repeats more than once.
525  while (!ToVisit.empty()) {
526  SuffixTreeNode *Curr = ToVisit.back();
527  ToVisit.pop_back();
528  LeafChildren.clear();
529 
530  // Keep track of the length of the string associated with the node. If
531  // it's too short, we'll quit.
532  unsigned Length = Curr->ConcatLen;
533 
534  // Iterate over each child, saving internal nodes for visiting, and
535  // leaf nodes in LeafChildren. Internal nodes represent individual
536  // strings, which may repeat.
537  for (auto &ChildPair : Curr->Children) {
538  // Save all of this node's children for processing.
539  if (!ChildPair.second->isLeaf())
540  ToVisit.push_back(ChildPair.second);
541 
542  // It's not an internal node, so it must be a leaf. If we have a
543  // long enough string, then save the leaf children.
544  else if (Length >= MinLength)
545  LeafChildren.push_back(ChildPair.second);
546  }
547 
548  // The root never represents a repeated substring. If we're looking at
549  // that, then skip it.
550  if (Curr->isRoot())
551  continue;
552 
553  // Do we have any repeated substrings?
554  if (LeafChildren.size() >= 2) {
555  // Yes. Update the state to reflect this, and then bail out.
556  N = Curr;
557  RS.Length = Length;
558  for (SuffixTreeNode *Leaf : LeafChildren)
559  RS.StartIndices.push_back(Leaf->SuffixIdx);
560  break;
561  }
562  }
563 
564  // At this point, either NewRS is an empty RepeatedSubstring, or it was
565  // set in the above loop. Similarly, N is either nullptr, or the node
566  // associated with NewRS.
567  }
568 
569  public:
570  /// Return the current repeated substring.
571  RepeatedSubstring &operator*() { return RS; }
572 
573  RepeatedSubstringIterator &operator++() {
574  advance();
575  return *this;
576  }
577 
578  RepeatedSubstringIterator operator++(int I) {
579  RepeatedSubstringIterator It(*this);
580  advance();
581  return It;
582  }
583 
584  bool operator==(const RepeatedSubstringIterator &Other) {
585  return N == Other.N;
586  }
587  bool operator!=(const RepeatedSubstringIterator &Other) {
588  return !(*this == Other);
589  }
590 
591  RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
592  // Do we have a non-null node?
593  if (N) {
594  // Yes. At the first step, we need to visit all of N's children.
595  // Note: This means that we visit N last.
596  ToVisit.push_back(N);
597  advance();
598  }
599  }
600 };
601 
602  typedef RepeatedSubstringIterator iterator;
603  iterator begin() { return iterator(Root); }
604  iterator end() { return iterator(nullptr); }
605 };
606 
607 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
608 struct InstructionMapper {
609 
610  /// The next available integer to assign to a \p MachineInstr that
611  /// cannot be outlined.
612  ///
613  /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
614  unsigned IllegalInstrNumber = -3;
615 
616  /// The next available integer to assign to a \p MachineInstr that can
617  /// be outlined.
618  unsigned LegalInstrNumber = 0;
619 
620  /// Correspondence from \p MachineInstrs to unsigned integers.
622  InstructionIntegerMap;
623 
624  /// Correspondence between \p MachineBasicBlocks and target-defined flags.
626 
627  /// The vector of unsigned integers that the module is mapped to.
628  std::vector<unsigned> UnsignedVec;
629 
630  /// Stores the location of the instruction associated with the integer
631  /// at index i in \p UnsignedVec for each index i.
632  std::vector<MachineBasicBlock::iterator> InstrList;
633 
634  // Set if we added an illegal number in the previous step.
635  // Since each illegal number is unique, we only need one of them between
636  // each range of legal numbers. This lets us make sure we don't add more
637  // than one illegal number per range.
638  bool AddedIllegalLastTime = false;
639 
640  /// Maps \p *It to a legal integer.
641  ///
642  /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
643  /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
644  ///
645  /// \returns The integer that \p *It was mapped to.
646  unsigned mapToLegalUnsigned(
647  MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
648  bool &HaveLegalRange, unsigned &NumLegalInBlock,
649  std::vector<unsigned> &UnsignedVecForMBB,
650  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
651  // We added something legal, so we should unset the AddedLegalLastTime
652  // flag.
653  AddedIllegalLastTime = false;
654 
655  // If we have at least two adjacent legal instructions (which may have
656  // invisible instructions in between), remember that.
657  if (CanOutlineWithPrevInstr)
658  HaveLegalRange = true;
659  CanOutlineWithPrevInstr = true;
660 
661  // Keep track of the number of legal instructions we insert.
662  NumLegalInBlock++;
663 
664  // Get the integer for this instruction or give it the current
665  // LegalInstrNumber.
666  InstrListForMBB.push_back(It);
667  MachineInstr &MI = *It;
668  bool WasInserted;
670  ResultIt;
671  std::tie(ResultIt, WasInserted) =
672  InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
673  unsigned MINumber = ResultIt->second;
674 
675  // There was an insertion.
676  if (WasInserted)
677  LegalInstrNumber++;
678 
679  UnsignedVecForMBB.push_back(MINumber);
680 
681  // Make sure we don't overflow or use any integers reserved by the DenseMap.
682  if (LegalInstrNumber >= IllegalInstrNumber)
683  report_fatal_error("Instruction mapping overflow!");
684 
685  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
686  "Tried to assign DenseMap tombstone or empty key to instruction.");
687  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
688  "Tried to assign DenseMap tombstone or empty key to instruction.");
689 
690  return MINumber;
691  }
692 
693  /// Maps \p *It to an illegal integer.
694  ///
695  /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
696  /// IllegalInstrNumber.
697  ///
698  /// \returns The integer that \p *It was mapped to.
699  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
700  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
701  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
702  // Can't outline an illegal instruction. Set the flag.
703  CanOutlineWithPrevInstr = false;
704 
705  // Only add one illegal number per range of legal numbers.
706  if (AddedIllegalLastTime)
707  return IllegalInstrNumber;
708 
709  // Remember that we added an illegal number last time.
710  AddedIllegalLastTime = true;
711  unsigned MINumber = IllegalInstrNumber;
712 
713  InstrListForMBB.push_back(It);
714  UnsignedVecForMBB.push_back(IllegalInstrNumber);
715  IllegalInstrNumber--;
716 
717  assert(LegalInstrNumber < IllegalInstrNumber &&
718  "Instruction mapping overflow!");
719 
720  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
721  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
722 
723  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
724  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
725 
726  return MINumber;
727  }
728 
729  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
730  /// and appends it to \p UnsignedVec and \p InstrList.
731  ///
732  /// Two instructions are assigned the same integer if they are identical.
733  /// If an instruction is deemed unsafe to outline, then it will be assigned an
734  /// unique integer. The resulting mapping is placed into a suffix tree and
735  /// queried for candidates.
736  ///
737  /// \param MBB The \p MachineBasicBlock to be translated into integers.
738  /// \param TII \p TargetInstrInfo for the function.
739  void convertToUnsignedVec(MachineBasicBlock &MBB,
740  const TargetInstrInfo &TII) {
741  unsigned Flags = 0;
742 
743  // Don't even map in this case.
744  if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
745  return;
746 
747  // Store info for the MBB for later outlining.
748  MBBFlagsMap[&MBB] = Flags;
749 
751 
752  // The number of instructions in this block that will be considered for
753  // outlining.
754  unsigned NumLegalInBlock = 0;
755 
756  // True if we have at least two legal instructions which aren't separated
757  // by an illegal instruction.
758  bool HaveLegalRange = false;
759 
760  // True if we can perform outlining given the last mapped (non-invisible)
761  // instruction. This lets us know if we have a legal range.
762  bool CanOutlineWithPrevInstr = false;
763 
764  // FIXME: Should this all just be handled in the target, rather than using
765  // repeated calls to getOutliningType?
766  std::vector<unsigned> UnsignedVecForMBB;
767  std::vector<MachineBasicBlock::iterator> InstrListForMBB;
768 
769  for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
770  // Keep track of where this instruction is in the module.
771  switch (TII.getOutliningType(It, Flags)) {
772  case InstrType::Illegal:
773  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
774  UnsignedVecForMBB, InstrListForMBB);
775  break;
776 
777  case InstrType::Legal:
778  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
779  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
780  break;
781 
783  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
784  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
785  // The instruction also acts as a terminator, so we have to record that
786  // in the string.
787  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
788  InstrListForMBB);
789  break;
790 
792  // Normally this is set by mapTo(Blah)Unsigned, but we just want to
793  // skip this instruction. So, unset the flag here.
794  AddedIllegalLastTime = false;
795  break;
796  }
797  }
798 
799  // Are there enough legal instructions in the block for outlining to be
800  // possible?
801  if (HaveLegalRange) {
802  // After we're done every insertion, uniquely terminate this part of the
803  // "string". This makes sure we won't match across basic block or function
804  // boundaries since the "end" is encoded uniquely and thus appears in no
805  // repeated substring.
806  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
807  InstrListForMBB);
808  InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
809  InstrListForMBB.end());
810  UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
811  UnsignedVecForMBB.end());
812  }
813  }
814 
815  InstructionMapper() {
816  // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
817  // changed.
818  assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
819  "DenseMapInfo<unsigned>'s empty key isn't -1!");
821  "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
822  }
823 };
824 
825 /// An interprocedural pass which finds repeated sequences of
826 /// instructions and replaces them with calls to functions.
827 ///
828 /// Each instruction is mapped to an unsigned integer and placed in a string.
829 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
830 /// is then repeatedly queried for repeated sequences of instructions. Each
831 /// non-overlapping repeated sequence is then placed in its own
832 /// \p MachineFunction and each instance is then replaced with a call to that
833 /// function.
834 struct MachineOutliner : public ModulePass {
835 
836  static char ID;
837 
838  /// Set to true if the outliner should consider functions with
839  /// linkonceodr linkage.
840  bool OutlineFromLinkOnceODRs = false;
841 
842  /// Set to true if the outliner should run on all functions in the module
843  /// considered safe for outlining.
844  /// Set to true by default for compatibility with llc's -run-pass option.
845  /// Set when the pass is constructed in TargetPassConfig.
846  bool RunOnAllFunctions = true;
847 
848  StringRef getPassName() const override { return "Machine Outliner"; }
849 
850  void getAnalysisUsage(AnalysisUsage &AU) const override {
853  AU.setPreservesAll();
855  }
856 
857  MachineOutliner() : ModulePass(ID) {
859  }
860 
861  /// Remark output explaining that not outlining a set of candidates would be
862  /// better than outlining that set.
863  void emitNotOutliningCheaperRemark(
864  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
865  OutlinedFunction &OF);
866 
867  /// Remark output explaining that a function was outlined.
868  void emitOutlinedFunctionRemark(OutlinedFunction &OF);
869 
870  /// Find all repeated substrings that satisfy the outlining cost model by
871  /// constructing a suffix tree.
872  ///
873  /// If a substring appears at least twice, then it must be represented by
874  /// an internal node which appears in at least two suffixes. Each suffix
875  /// is represented by a leaf node. To do this, we visit each internal node
876  /// in the tree, using the leaf children of each internal node. If an
877  /// internal node represents a beneficial substring, then we use each of
878  /// its leaf children to find the locations of its substring.
879  ///
880  /// \param Mapper Contains outlining mapping information.
881  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
882  /// each type of candidate.
883  void findCandidates(InstructionMapper &Mapper,
884  std::vector<OutlinedFunction> &FunctionList);
885 
886  /// Replace the sequences of instructions represented by \p OutlinedFunctions
887  /// with calls to functions.
888  ///
889  /// \param M The module we are outlining from.
890  /// \param FunctionList A list of functions to be inserted into the module.
891  /// \param Mapper Contains the instruction mappings for the module.
892  bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
893  InstructionMapper &Mapper);
894 
895  /// Creates a function for \p OF and inserts it into the module.
896  MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
897  InstructionMapper &Mapper,
898  unsigned Name);
899 
900  /// Construct a suffix tree on the instructions in \p M and outline repeated
901  /// strings from that tree.
902  bool runOnModule(Module &M) override;
903 
904  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
905  /// function for remark emission.
906  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
907  DISubprogram *SP;
908  for (const Candidate &C : OF.Candidates)
909  if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
910  return SP;
911  return nullptr;
912  }
913 
914  /// Populate and \p InstructionMapper with instruction-to-integer mappings.
915  /// These are used to construct a suffix tree.
916  void populateMapper(InstructionMapper &Mapper, Module &M,
917  MachineModuleInfo &MMI);
918 
919  /// Initialize information necessary to output a size remark.
920  /// FIXME: This should be handled by the pass manager, not the outliner.
921  /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
922  /// pass manager.
923  void initSizeRemarkInfo(
924  const Module &M, const MachineModuleInfo &MMI,
925  StringMap<unsigned> &FunctionToInstrCount);
926 
927  /// Emit the remark.
928  // FIXME: This should be handled by the pass manager, not the outliner.
929  void emitInstrCountChangedRemark(
930  const Module &M, const MachineModuleInfo &MMI,
931  const StringMap<unsigned> &FunctionToInstrCount);
932 };
933 } // Anonymous namespace.
934 
935 char MachineOutliner::ID = 0;
936 
937 namespace llvm {
938 ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
939  MachineOutliner *OL = new MachineOutliner();
940  OL->RunOnAllFunctions = RunOnAllFunctions;
941  return OL;
942 }
943 
944 } // namespace llvm
945 
946 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
947  false)
948 
949 void MachineOutliner::emitNotOutliningCheaperRemark(
950  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
951  OutlinedFunction &OF) {
952  // FIXME: Right now, we arbitrarily choose some Candidate from the
953  // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
954  // We should probably sort these by function name or something to make sure
955  // the remarks are stable.
956  Candidate &C = CandidatesForRepeatedSeq.front();
957  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
958  MORE.emit([&]() {
959  MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
960  C.front()->getDebugLoc(), C.getMBB());
961  R << "Did not outline " << NV("Length", StringLen) << " instructions"
962  << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
963  << " locations."
964  << " Bytes from outlining all occurrences ("
965  << NV("OutliningCost", OF.getOutliningCost()) << ")"
966  << " >= Unoutlined instruction bytes ("
967  << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
968  << " (Also found at: ";
969 
970  // Tell the user the other places the candidate was found.
971  for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
972  R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
973  CandidatesForRepeatedSeq[i].front()->getDebugLoc());
974  if (i != e - 1)
975  R << ", ";
976  }
977 
978  R << ")";
979  return R;
980  });
981 }
982 
983 void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
984  MachineBasicBlock *MBB = &*OF.MF->begin();
985  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
986  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
987  MBB->findDebugLoc(MBB->begin()), MBB);
988  R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
989  << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
990  << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
991  << " locations. "
992  << "(Found at: ";
993 
994  // Tell the user the other places the candidate was found.
995  for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
996 
997  R << NV((Twine("StartLoc") + Twine(i)).str(),
998  OF.Candidates[i].front()->getDebugLoc());
999  if (i != e - 1)
1000  R << ", ";
1001  }
1002 
1003  R << ")";
1004 
1005  MORE.emit(R);
1006 }
1007 
1008 void
1009 MachineOutliner::findCandidates(InstructionMapper &Mapper,
1010  std::vector<OutlinedFunction> &FunctionList) {
1011  FunctionList.clear();
1012  SuffixTree ST(Mapper.UnsignedVec);
1013 
1014  // First, find dall of the repeated substrings in the tree of minimum length
1015  // 2.
1016  std::vector<Candidate> CandidatesForRepeatedSeq;
1017  for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
1018  CandidatesForRepeatedSeq.clear();
1019  SuffixTree::RepeatedSubstring RS = *It;
1020  unsigned StringLen = RS.Length;
1021  for (const unsigned &StartIdx : RS.StartIndices) {
1022  unsigned EndIdx = StartIdx + StringLen - 1;
1023  // Trick: Discard some candidates that would be incompatible with the
1024  // ones we've already found for this sequence. This will save us some
1025  // work in candidate selection.
1026  //
1027  // If two candidates overlap, then we can't outline them both. This
1028  // happens when we have candidates that look like, say
1029  //
1030  // AA (where each "A" is an instruction).
1031  //
1032  // We might have some portion of the module that looks like this:
1033  // AAAAAA (6 A's)
1034  //
1035  // In this case, there are 5 different copies of "AA" in this range, but
1036  // at most 3 can be outlined. If only outlining 3 of these is going to
1037  // be unbeneficial, then we ought to not bother.
1038  //
1039  // Note that two things DON'T overlap when they look like this:
1040  // start1...end1 .... start2...end2
1041  // That is, one must either
1042  // * End before the other starts
1043  // * Start after the other ends
1044  if (std::all_of(
1045  CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
1046  [&StartIdx, &EndIdx](const Candidate &C) {
1047  return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
1048  })) {
1049  // It doesn't overlap with anything, so we can outline it.
1050  // Each sequence is over [StartIt, EndIt].
1051  // Save the candidate and its location.
1052 
1053  MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
1054  MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
1055  MachineBasicBlock *MBB = StartIt->getParent();
1056 
1057  CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
1058  EndIt, MBB, FunctionList.size(),
1059  Mapper.MBBFlagsMap[MBB]);
1060  }
1061  }
1062 
1063  // We've found something we might want to outline.
1064  // Create an OutlinedFunction to store it and check if it'd be beneficial
1065  // to outline.
1066  if (CandidatesForRepeatedSeq.size() < 2)
1067  continue;
1068 
1069  // Arbitrarily choose a TII from the first candidate.
1070  // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
1071  const TargetInstrInfo *TII =
1072  CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
1073 
1074  OutlinedFunction OF =
1075  TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
1076 
1077  // If we deleted too many candidates, then there's nothing worth outlining.
1078  // FIXME: This should take target-specified instruction sizes into account.
1079  if (OF.Candidates.size() < 2)
1080  continue;
1081 
1082  // Is it better to outline this candidate than not?
1083  if (OF.getBenefit() < 1) {
1084  emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
1085  continue;
1086  }
1087 
1088  FunctionList.push_back(OF);
1089  }
1090 }
1091 
1093 MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
1094  InstructionMapper &Mapper,
1095  unsigned Name) {
1096 
1097  // Create the function name. This should be unique. For now, just hash the
1098  // module name and include it in the function name plus the number of this
1099  // function.
1100  std::ostringstream NameStream;
1101  // FIXME: We should have a better naming scheme. This should be stable,
1102  // regardless of changes to the outliner's cost model/traversal order.
1103  NameStream << "OUTLINED_FUNCTION_" << Name;
1104 
1105  // Create the function using an IR-level function.
1106  LLVMContext &C = M.getContext();
1107  Function *F =
1109  Function::ExternalLinkage, NameStream.str(), M);
1110 
1111  // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1112  // which gives us better results when we outline from linkonceodr functions.
1115 
1116  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
1117  // necessary.
1118 
1119  // Set optsize/minsize, so we don't insert padding between outlined
1120  // functions.
1121  F->addFnAttr(Attribute::OptimizeForSize);
1122  F->addFnAttr(Attribute::MinSize);
1123 
1124  // Include target features from an arbitrary candidate for the outlined
1125  // function. This makes sure the outlined function knows what kinds of
1126  // instructions are going into it. This is fine, since all parent functions
1127  // must necessarily support the instructions that are in the outlined region.
1128  Candidate &FirstCand = OF.Candidates.front();
1129  const Function &ParentFn = FirstCand.getMF()->getFunction();
1130  if (ParentFn.hasFnAttribute("target-features"))
1131  F->addFnAttr(ParentFn.getFnAttribute("target-features"));
1132 
1133  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1134  IRBuilder<> Builder(EntryBB);
1135  Builder.CreateRetVoid();
1136 
1137  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1139  MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1140  const TargetSubtargetInfo &STI = MF.getSubtarget();
1141  const TargetInstrInfo &TII = *STI.getInstrInfo();
1142 
1143  // Insert the new function into the module.
1144  MF.insert(MF.begin(), &MBB);
1145 
1146  for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
1147  ++I) {
1148  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1149  NewMI->dropMemRefs(MF);
1150 
1151  // Don't keep debug information for outlined instructions.
1152  NewMI->setDebugLoc(DebugLoc());
1153  MBB.insert(MBB.end(), NewMI);
1154  }
1155 
1156  TII.buildOutlinedFrame(MBB, MF, OF);
1157 
1158  // Outlined functions shouldn't preserve liveness.
1159  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
1160  MF.getRegInfo().freezeReservedRegs(MF);
1161 
1162  // If there's a DISubprogram associated with this outlined function, then
1163  // emit debug info for the outlined function.
1164  if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1165  // We have a DISubprogram. Get its DICompileUnit.
1166  DICompileUnit *CU = SP->getUnit();
1167  DIBuilder DB(M, true, CU);
1168  DIFile *Unit = SP->getFile();
1169  Mangler Mg;
1170  // Get the mangled name of the function for the linkage name.
1171  std::string Dummy;
1172  llvm::raw_string_ostream MangledNameStream(Dummy);
1173  Mg.getNameWithPrefix(MangledNameStream, F, false);
1174 
1175  DISubprogram *OutlinedSP = DB.createFunction(
1176  Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
1177  Unit /* File */,
1178  0 /* Line 0 is reserved for compiler-generated code. */,
1179  DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
1180  0, /* Line 0 is reserved for compiler-generated code. */
1181  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1182  /* Outlined code is optimized code by definition. */
1183  DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1184 
1185  // Don't add any new variables to the subprogram.
1186  DB.finalizeSubprogram(OutlinedSP);
1187 
1188  // Attach subprogram to the function.
1189  F->setSubprogram(OutlinedSP);
1190  // We're done with the DIBuilder.
1191  DB.finalize();
1192  }
1193 
1194  return &MF;
1195 }
1196 
1197 bool MachineOutliner::outline(Module &M,
1198  std::vector<OutlinedFunction> &FunctionList,
1199  InstructionMapper &Mapper) {
1200 
1201  bool OutlinedSomething = false;
1202 
1203  // Number to append to the current outlined function.
1204  unsigned OutlinedFunctionNum = 0;
1205 
1206  // Sort by benefit. The most beneficial functions should be outlined first.
1207  std::stable_sort(
1208  FunctionList.begin(), FunctionList.end(),
1209  [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
1210  return LHS.getBenefit() > RHS.getBenefit();
1211  });
1212 
1213  // Walk over each function, outlining them as we go along. Functions are
1214  // outlined greedily, based off the sort above.
1215  for (OutlinedFunction &OF : FunctionList) {
1216  // If we outlined something that overlapped with a candidate in a previous
1217  // step, then we can't outline from it.
1218  erase_if(OF.Candidates, [&Mapper](Candidate &C) {
1219  return std::any_of(
1220  Mapper.UnsignedVec.begin() + C.getStartIdx(),
1221  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1222  [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
1223  });
1224 
1225  // If we made it unbeneficial to outline this function, skip it.
1226  if (OF.getBenefit() < 1)
1227  continue;
1228 
1229  // It's beneficial. Create the function and outline its sequence's
1230  // occurrences.
1231  OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
1232  emitOutlinedFunctionRemark(OF);
1233  FunctionsCreated++;
1234  OutlinedFunctionNum++; // Created a function, move to the next name.
1235  MachineFunction *MF = OF.MF;
1236  const TargetSubtargetInfo &STI = MF->getSubtarget();
1237  const TargetInstrInfo &TII = *STI.getInstrInfo();
1238 
1239  // Replace occurrences of the sequence with calls to the new function.
1240  for (Candidate &C : OF.Candidates) {
1241  MachineBasicBlock &MBB = *C.getMBB();
1242  MachineBasicBlock::iterator StartIt = C.front();
1243  MachineBasicBlock::iterator EndIt = C.back();
1244 
1245  // Insert the call.
1246  auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
1247 
1248  // If the caller tracks liveness, then we need to make sure that
1249  // anything we outline doesn't break liveness assumptions. The outlined
1250  // functions themselves currently don't track liveness, but we should
1251  // make sure that the ranges we yank things out of aren't wrong.
1252  if (MBB.getParent()->getProperties().hasProperty(
1254  // Helper lambda for adding implicit def operands to the call
1255  // instruction.
1256  auto CopyDefs = [&CallInst](MachineInstr &MI) {
1257  for (MachineOperand &MOP : MI.operands()) {
1258  // Skip over anything that isn't a register.
1259  if (!MOP.isReg())
1260  continue;
1261 
1262  // If it's a def, add it to the call instruction.
1263  if (MOP.isDef())
1264  CallInst->addOperand(MachineOperand::CreateReg(
1265  MOP.getReg(), true, /* isDef = true */
1266  true /* isImp = true */));
1267  }
1268  };
1269  // Copy over the defs in the outlined range.
1270  // First inst in outlined range <-- Anything that's defined in this
1271  // ... .. range has to be added as an
1272  // implicit Last inst in outlined range <-- def to the call
1273  // instruction.
1274  std::for_each(CallInst, std::next(EndIt), CopyDefs);
1275  }
1276 
1277  // Erase from the point after where the call was inserted up to, and
1278  // including, the final instruction in the sequence.
1279  // Erase needs one past the end, so we need std::next there too.
1280  MBB.erase(std::next(StartIt), std::next(EndIt));
1281 
1282  // Keep track of what we removed by marking them all as -1.
1283  std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
1284  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1285  [](unsigned &I) { I = static_cast<unsigned>(-1); });
1286  OutlinedSomething = true;
1287 
1288  // Statistics.
1289  NumOutlined++;
1290  }
1291  }
1292 
1293  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1294 
1295  return OutlinedSomething;
1296 }
1297 
1298 void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
1299  MachineModuleInfo &MMI) {
1300  // Build instruction mappings for each function in the module. Start by
1301  // iterating over each Function in M.
1302  for (Function &F : M) {
1303 
1304  // If there's nothing in F, then there's no reason to try and outline from
1305  // it.
1306  if (F.empty())
1307  continue;
1308 
1309  // There's something in F. Check if it has a MachineFunction associated with
1310  // it.
1312 
1313  // If it doesn't, then there's nothing to outline from. Move to the next
1314  // Function.
1315  if (!MF)
1316  continue;
1317 
1318  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1319 
1320  if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
1321  continue;
1322 
1323  // We have a MachineFunction. Ask the target if it's suitable for outlining.
1324  // If it isn't, then move on to the next Function in the module.
1325  if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
1326  continue;
1327 
1328  // We have a function suitable for outlining. Iterate over every
1329  // MachineBasicBlock in MF and try to map its instructions to a list of
1330  // unsigned integers.
1331  for (MachineBasicBlock &MBB : *MF) {
1332  // If there isn't anything in MBB, then there's no point in outlining from
1333  // it.
1334  // If there are fewer than 2 instructions in the MBB, then it can't ever
1335  // contain something worth outlining.
1336  // FIXME: This should be based off of the maximum size in B of an outlined
1337  // call versus the size in B of the MBB.
1338  if (MBB.empty() || MBB.size() < 2)
1339  continue;
1340 
1341  // Check if MBB could be the target of an indirect branch. If it is, then
1342  // we don't want to outline from it.
1343  if (MBB.hasAddressTaken())
1344  continue;
1345 
1346  // MBB is suitable for outlining. Map it to a list of unsigneds.
1347  Mapper.convertToUnsignedVec(MBB, *TII);
1348  }
1349  }
1350 }
1351 
1352 void MachineOutliner::initSizeRemarkInfo(
1353  const Module &M, const MachineModuleInfo &MMI,
1354  StringMap<unsigned> &FunctionToInstrCount) {
1355  // Collect instruction counts for every function. We'll use this to emit
1356  // per-function size remarks later.
1357  for (const Function &F : M) {
1359 
1360  // We only care about MI counts here. If there's no MachineFunction at this
1361  // point, then there won't be after the outliner runs, so let's move on.
1362  if (!MF)
1363  continue;
1364  FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1365  }
1366 }
1367 
1368 void MachineOutliner::emitInstrCountChangedRemark(
1369  const Module &M, const MachineModuleInfo &MMI,
1370  const StringMap<unsigned> &FunctionToInstrCount) {
1371  // Iterate over each function in the module and emit remarks.
1372  // Note that we won't miss anything by doing this, because the outliner never
1373  // deletes functions.
1374  for (const Function &F : M) {
1376 
1377  // The outliner never deletes functions. If we don't have a MF here, then we
1378  // didn't have one prior to outlining either.
1379  if (!MF)
1380  continue;
1381 
1382  std::string Fname = F.getName();
1383  unsigned FnCountAfter = MF->getInstructionCount();
1384  unsigned FnCountBefore = 0;
1385 
1386  // Check if the function was recorded before.
1387  auto It = FunctionToInstrCount.find(Fname);
1388 
1389  // Did we have a previously-recorded size? If yes, then set FnCountBefore
1390  // to that.
1391  if (It != FunctionToInstrCount.end())
1392  FnCountBefore = It->second;
1393 
1394  // Compute the delta and emit a remark if there was a change.
1395  int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1396  static_cast<int64_t>(FnCountBefore);
1397  if (FnDelta == 0)
1398  continue;
1399 
1400  MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1401  MORE.emit([&]() {
1402  MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1404  &MF->front());
1405  R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1406  << ": Function: "
1407  << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1408  << ": MI instruction count changed from "
1409  << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1410  FnCountBefore)
1411  << " to "
1412  << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1413  FnCountAfter)
1414  << "; Delta: "
1415  << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1416  return R;
1417  });
1418  }
1419 }
1420 
1421 bool MachineOutliner::runOnModule(Module &M) {
1422  // Check if there's anything in the module. If it's empty, then there's
1423  // nothing to outline.
1424  if (M.empty())
1425  return false;
1426 
1427  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1428 
1429  // If the user passed -enable-machine-outliner=always or
1430  // -enable-machine-outliner, the pass will run on all functions in the module.
1431  // Otherwise, if the target supports default outlining, it will run on all
1432  // functions deemed by the target to be worth outlining from by default. Tell
1433  // the user how the outliner is running.
1434  LLVM_DEBUG(
1435  dbgs() << "Machine Outliner: Running on ";
1436  if (RunOnAllFunctions)
1437  dbgs() << "all functions";
1438  else
1439  dbgs() << "target-default functions";
1440  dbgs() << "\n"
1441  );
1442 
1443  // If the user specifies that they want to outline from linkonceodrs, set
1444  // it here.
1445  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1446  InstructionMapper Mapper;
1447 
1448  // Prepare instruction mappings for the suffix tree.
1449  populateMapper(Mapper, M, MMI);
1450  std::vector<OutlinedFunction> FunctionList;
1451 
1452  // Find all of the outlining candidates.
1453  findCandidates(Mapper, FunctionList);
1454 
1455  // If we've requested size remarks, then collect the MI counts of every
1456  // function before outlining, and the MI counts after outlining.
1457  // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1458  // the pass manager's responsibility.
1459  // This could pretty easily be placed in outline instead, but because we
1460  // really ultimately *don't* want this here, it's done like this for now
1461  // instead.
1462 
1463  // Check if we want size remarks.
1464  bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1465  StringMap<unsigned> FunctionToInstrCount;
1466  if (ShouldEmitSizeRemarks)
1467  initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
1468 
1469  // Outline each of the candidates and return true if something was outlined.
1470  bool OutlinedSomething = outline(M, FunctionList, Mapper);
1471 
1472  // If we outlined something, we definitely changed the MI count of the
1473  // module. If we've asked for size remarks, then output them.
1474  // FIXME: This should be in the pass manager.
1475  if (ShouldEmitSizeRemarks && OutlinedSomething)
1476  emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
1477 
1478  return OutlinedSomething;
1479 }
void finalize()
Construct any deferred debug info descriptors.
Definition: DIBuilder.cpp:68
const Function & getFunction() const
Definition: Function.h:133
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
unsigned getInstructionCount() const
Return the number of MachineInstrs in this MachineFunction.
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:258
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:249
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
const MachineFunctionProperties & getProperties() const
Get the function properties.
DIFile * getFile() const
Diagnostic information for applied optimization remarks.
This class represents a function call, abstracting a target machine&#39;s calling convention.
iterator find(StringRef Key)
Definition: StringMap.h:332
Externally visible function.
Definition: GlobalValue.h:48
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
F(f)
DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine, DINode::DIFlags Flags=DINode::FlagZero, DISubprogram::DISPFlags SPFlags=DISubprogram::SPFlagZero, DITemplateParameterArray TParams=nullptr, DISubprogram *Decl=nullptr, DITypeArray ThrownTypes=nullptr)
Create a new descriptor for the specified subprogram.
Definition: DIBuilder.cpp:751
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
AnalysisUsage & addRequired()
Definition: BitVector.h:937
virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const
Insert a call to an outlined function into the program.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
const HexagonInstrInfo * TII
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:243
DISubroutineType * createSubroutineType(DITypeRefArray ParameterTypes, DINode::DIFlags Flags=DINode::FlagZero, unsigned CC=0)
Create subroutine type.
Definition: DIBuilder.cpp:497
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2090
Diagnostic information for optimization analysis remarks.
Subprogram description.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:91
bool empty() const
Definition: Module.h:606
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr&#39;s memory reference descriptor list.
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const
Return true if the function can safely be outlined from.
TargetInstrInfo - Interface to description of machine instruction set.
DITypeRefArray getOrCreateTypeArray(ArrayRef< Metadata *> Elements)
Get a DITypeRefArray, create one if required.
Definition: DIBuilder.cpp:611
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const
Return true if the function should be outlined from by default.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
void setSubprogram(DISubprogram *SP)
Set the attached subprogram.
Definition: Metadata.cpp:1503
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:135
bool isLeaf(ID id)
Returns true if the intrinsic is a leaf, i.e.
Definition: Function.cpp:1001
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
bool shouldEmitInstrCountChangedRemark()
Return true if size-info optimization remark is enabled, false otherwise.
Definition: Module.h:262
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const
Returns how or if MI should be outlined.
Contains all data structures shared between the outliner implemented in MachineOutliner.cpp and target implementations of the outliner.
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const
Optional target hook that returns true if MBB is safe to outline from, and returns any target-specifi...
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:296
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
Definition: Allocator.h:490
Used in the streaming interface as the general argument type.
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
const MachineBasicBlock & front() const
MachineFunction & getOrCreateMachineFunction(const Function &F)
Returns the MachineFunction constructed for the IR function F.
ModulePass * createMachineOutlinerPass(bool RunOnAllFunctions=true)
This pass performs outlining on machine instructions directly before printing assembly.
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
void initializeMachineOutlinerPass(PassRegistry &)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const
Insert a custom frame for outlined functions.
The optimization diagnostic interface.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr...
MachineOperand class - Representation of each machine instruction operand.
#define MORE()
Definition: regcomp.c:251
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:441
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:444
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1329
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
void setPreservesAll()
Set by analyses that do not transform their input at all.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1968
Representation of each machine instruction.
Definition: MachineInstr.h:63
ReturnInst * CreateRetVoid()
Create a &#39;ret void&#39; instruction.
Definition: IRBuilder.h:823
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:215
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
void finalizeSubprogram(DISubprogram *SP)
Finalize a specific subprogram - no new variables may be added to this subprogram afterwards...
Definition: DIBuilder.cpp:48
Diagnostic information for missed-optimization remarks.
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasProperty(Property P) const
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable&#39;s name.
Definition: Mangler.cpp:111
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:229
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1966
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1178
static cl::opt< bool > EnableLinkOnceODROutlining("enable-linkonceodr-outlining", cl::Hidden, cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false))
#define LLVM_DEBUG(X)
Definition: Debug.h:122
virtual outliner::OutlinedFunction getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const
Returns a outliner::OutlinedFunction struct containing target-specific information for a set of outli...
#define DEBUG_TYPE
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
iterator end()
Definition: StringMap.h:317
This class contains meta information specific to a module.