LLVM  10.0.0svn
MachineOutliner.cpp
Go to the documentation of this file.
1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Replaces repeated sequences of instructions with function calls.
11 ///
12 /// This works by placing every instruction from every basic block in a
13 /// suffix tree, and repeatedly querying that tree for repeated sequences of
14 /// instructions. If a sequence of instructions appears often, then it ought
15 /// to be beneficial to pull out into a function.
16 ///
17 /// The MachineOutliner communicates with a given target using hooks defined in
18 /// TargetInstrInfo.h. The target supplies the outliner with information on how
19 /// a specific sequence of instructions should be outlined. This information
20 /// is used to deduce the number of instructions necessary to
21 ///
22 /// * Create an outlined function
23 /// * Call that outlined function
24 ///
25 /// Targets must implement
26 /// * getOutliningCandidateInfo
27 /// * buildOutlinedFrame
28 /// * insertOutlinedCall
29 /// * isFunctionSafeToOutlineFrom
30 ///
31 /// in order to make use of the MachineOutliner.
32 ///
33 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
34 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
35 /// how this pass works, the talk is available on YouTube at
36 ///
37 /// https://www.youtube.com/watch?v=yorld-WSOeU
38 ///
39 /// The slides for the talk are available at
40 ///
41 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42 ///
43 /// The talk provides an overview of how the outliner finds candidates and
44 /// ultimately outlines them. It describes how the main data structure for this
45 /// pass, the suffix tree, is queried and purged for candidates. It also gives
46 /// a simplified suffix tree construction algorithm for suffix trees based off
47 /// of the algorithm actually used here, Ukkonen's algorithm.
48 ///
49 /// For the original RFC for this pass, please see
50 ///
51 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52 ///
53 /// For more information on the suffix tree data structure, please see
54 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55 ///
56 //===----------------------------------------------------------------------===//
58 #include "llvm/ADT/DenseMap.h"
59 #include "llvm/ADT/Statistic.h"
60 #include "llvm/ADT/Twine.h"
65 #include "llvm/CodeGen/Passes.h"
68 #include "llvm/IR/DIBuilder.h"
69 #include "llvm/IR/IRBuilder.h"
70 #include "llvm/IR/Mangler.h"
71 #include "llvm/Support/Allocator.h"
73 #include "llvm/Support/Debug.h"
75 #include <functional>
76 #include <tuple>
77 #include <vector>
78 
79 #define DEBUG_TYPE "machine-outliner"
80 
81 using namespace llvm;
82 using namespace ore;
83 using namespace outliner;
84 
85 STATISTIC(NumOutlined, "Number of candidates outlined");
86 STATISTIC(FunctionsCreated, "Number of functions created");
87 
88 // Set to true if the user wants the outliner to run on linkonceodr linkage
89 // functions. This is false by default because the linker can dedupe linkonceodr
90 // functions. Since the outliner is confined to a single module (modulo LTO),
91 // this is off by default. It should, however, be the default behaviour in
92 // LTO.
94  "enable-linkonceodr-outlining",
95  cl::Hidden,
96  cl::desc("Enable the machine outliner on linkonceodr functions"),
97  cl::init(false));
98 
99 namespace {
100 
101 /// Represents an undefined index in the suffix tree.
102 const unsigned EmptyIdx = -1;
103 
104 /// A node in a suffix tree which represents a substring or suffix.
105 ///
106 /// Each node has either no children or at least two children, with the root
107 /// being a exception in the empty tree.
108 ///
109 /// Children are represented as a map between unsigned integers and nodes. If
110 /// a node N has a child M on unsigned integer k, then the mapping represented
111 /// by N is a proper prefix of the mapping represented by M. Note that this,
112 /// although similar to a trie is somewhat different: each node stores a full
113 /// substring of the full mapping rather than a single character state.
114 ///
115 /// Each internal node contains a pointer to the internal node representing
116 /// the same string, but with the first character chopped off. This is stored
117 /// in \p Link. Each leaf node stores the start index of its respective
118 /// suffix in \p SuffixIdx.
119 struct SuffixTreeNode {
120 
121  /// The children of this node.
122  ///
123  /// A child existing on an unsigned integer implies that from the mapping
124  /// represented by the current node, there is a way to reach another
125  /// mapping by tacking that character on the end of the current string.
127 
128  /// The start index of this node's substring in the main string.
129  unsigned StartIdx = EmptyIdx;
130 
131  /// The end index of this node's substring in the main string.
132  ///
133  /// Every leaf node must have its \p EndIdx incremented at the end of every
134  /// step in the construction algorithm. To avoid having to update O(N)
135  /// nodes individually at the end of every step, the end index is stored
136  /// as a pointer.
137  unsigned *EndIdx = nullptr;
138 
139  /// For leaves, the start index of the suffix represented by this node.
140  ///
141  /// For all other nodes, this is ignored.
142  unsigned SuffixIdx = EmptyIdx;
143 
144  /// For internal nodes, a pointer to the internal node representing
145  /// the same sequence with the first character chopped off.
146  ///
147  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
148  /// Ukkonen's algorithm does to achieve linear-time construction is
149  /// keep track of which node the next insert should be at. This makes each
150  /// insert O(1), and there are a total of O(N) inserts. The suffix link
151  /// helps with inserting children of internal nodes.
152  ///
153  /// Say we add a child to an internal node with associated mapping S. The
154  /// next insertion must be at the node representing S - its first character.
155  /// This is given by the way that we iteratively build the tree in Ukkonen's
156  /// algorithm. The main idea is to look at the suffixes of each prefix in the
157  /// string, starting with the longest suffix of the prefix, and ending with
158  /// the shortest. Therefore, if we keep pointers between such nodes, we can
159  /// move to the next insertion point in O(1) time. If we don't, then we'd
160  /// have to query from the root, which takes O(N) time. This would make the
161  /// construction algorithm O(N^2) rather than O(N).
162  SuffixTreeNode *Link = nullptr;
163 
164  /// The length of the string formed by concatenating the edge labels from the
165  /// root to this node.
166  unsigned ConcatLen = 0;
167 
168  /// Returns true if this node is a leaf.
169  bool isLeaf() const { return SuffixIdx != EmptyIdx; }
170 
171  /// Returns true if this node is the root of its owning \p SuffixTree.
172  bool isRoot() const { return StartIdx == EmptyIdx; }
173 
174  /// Return the number of elements in the substring associated with this node.
175  size_t size() const {
176 
177  // Is it the root? If so, it's the empty string so return 0.
178  if (isRoot())
179  return 0;
180 
181  assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
182 
183  // Size = the number of elements in the string.
184  // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
185  return *EndIdx - StartIdx + 1;
186  }
187 
188  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
189  : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
190 
191  SuffixTreeNode() {}
192 };
193 
194 /// A data structure for fast substring queries.
195 ///
196 /// Suffix trees represent the suffixes of their input strings in their leaves.
197 /// A suffix tree is a type of compressed trie structure where each node
198 /// represents an entire substring rather than a single character. Each leaf
199 /// of the tree is a suffix.
200 ///
201 /// A suffix tree can be seen as a type of state machine where each state is a
202 /// substring of the full string. The tree is structured so that, for a string
203 /// of length N, there are exactly N leaves in the tree. This structure allows
204 /// us to quickly find repeated substrings of the input string.
205 ///
206 /// In this implementation, a "string" is a vector of unsigned integers.
207 /// These integers may result from hashing some data type. A suffix tree can
208 /// contain 1 or many strings, which can then be queried as one large string.
209 ///
210 /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
211 /// suffix tree construction. Ukkonen's algorithm is explained in more detail
212 /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
213 /// paper is available at
214 ///
215 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
216 class SuffixTree {
217 public:
218  /// Each element is an integer representing an instruction in the module.
219  ArrayRef<unsigned> Str;
220 
221  /// A repeated substring in the tree.
222  struct RepeatedSubstring {
223  /// The length of the string.
224  unsigned Length;
225 
226  /// The start indices of each occurrence.
227  std::vector<unsigned> StartIndices;
228  };
229 
230 private:
231  /// Maintains each node in the tree.
233 
234  /// The root of the suffix tree.
235  ///
236  /// The root represents the empty string. It is maintained by the
237  /// \p NodeAllocator like every other node in the tree.
238  SuffixTreeNode *Root = nullptr;
239 
240  /// Maintains the end indices of the internal nodes in the tree.
241  ///
242  /// Each internal node is guaranteed to never have its end index change
243  /// during the construction algorithm; however, leaves must be updated at
244  /// every step. Therefore, we need to store leaf end indices by reference
245  /// to avoid updating O(N) leaves at every step of construction. Thus,
246  /// every internal node must be allocated its own end index.
247  BumpPtrAllocator InternalEndIdxAllocator;
248 
249  /// The end index of each leaf in the tree.
250  unsigned LeafEndIdx = -1;
251 
252  /// Helper struct which keeps track of the next insertion point in
253  /// Ukkonen's algorithm.
254  struct ActiveState {
255  /// The next node to insert at.
256  SuffixTreeNode *Node;
257 
258  /// The index of the first character in the substring currently being added.
259  unsigned Idx = EmptyIdx;
260 
261  /// The length of the substring we have to add at the current step.
262  unsigned Len = 0;
263  };
264 
265  /// The point the next insertion will take place at in the
266  /// construction algorithm.
267  ActiveState Active;
268 
269  /// Allocate a leaf node and add it to the tree.
270  ///
271  /// \param Parent The parent of this node.
272  /// \param StartIdx The start index of this node's associated string.
273  /// \param Edge The label on the edge leaving \p Parent to this node.
274  ///
275  /// \returns A pointer to the allocated leaf node.
276  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
277  unsigned Edge) {
278 
279  assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
280 
281  SuffixTreeNode *N = new (NodeAllocator.Allocate())
282  SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
283  Parent.Children[Edge] = N;
284 
285  return N;
286  }
287 
288  /// Allocate an internal node and add it to the tree.
289  ///
290  /// \param Parent The parent of this node. Only null when allocating the root.
291  /// \param StartIdx The start index of this node's associated string.
292  /// \param EndIdx The end index of this node's associated string.
293  /// \param Edge The label on the edge leaving \p Parent to this node.
294  ///
295  /// \returns A pointer to the allocated internal node.
296  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
297  unsigned EndIdx, unsigned Edge) {
298 
299  assert(StartIdx <= EndIdx && "String can't start after it ends!");
300  assert(!(!Parent && StartIdx != EmptyIdx) &&
301  "Non-root internal nodes must have parents!");
302 
303  unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
304  SuffixTreeNode *N = new (NodeAllocator.Allocate())
305  SuffixTreeNode(StartIdx, E, Root);
306  if (Parent)
307  Parent->Children[Edge] = N;
308 
309  return N;
310  }
311 
312  /// Set the suffix indices of the leaves to the start indices of their
313  /// respective suffixes.
314  ///
315  /// \param[in] CurrNode The node currently being visited.
316  /// \param CurrNodeLen The concatenation of all node sizes from the root to
317  /// this node. Used to produce suffix indices.
318  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
319 
320  bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
321 
322  // Store the concatenation of lengths down from the root.
323  CurrNode.ConcatLen = CurrNodeLen;
324  // Traverse the tree depth-first.
325  for (auto &ChildPair : CurrNode.Children) {
326  assert(ChildPair.second && "Node had a null child!");
327  setSuffixIndices(*ChildPair.second,
328  CurrNodeLen + ChildPair.second->size());
329  }
330 
331  // Is this node a leaf? If it is, give it a suffix index.
332  if (IsLeaf)
333  CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
334  }
335 
336  /// Construct the suffix tree for the prefix of the input ending at
337  /// \p EndIdx.
338  ///
339  /// Used to construct the full suffix tree iteratively. At the end of each
340  /// step, the constructed suffix tree is either a valid suffix tree, or a
341  /// suffix tree with implicit suffixes. At the end of the final step, the
342  /// suffix tree is a valid tree.
343  ///
344  /// \param EndIdx The end index of the current prefix in the main string.
345  /// \param SuffixesToAdd The number of suffixes that must be added
346  /// to complete the suffix tree at the current phase.
347  ///
348  /// \returns The number of suffixes that have not been added at the end of
349  /// this step.
350  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
351  SuffixTreeNode *NeedsLink = nullptr;
352 
353  while (SuffixesToAdd > 0) {
354 
355  // Are we waiting to add anything other than just the last character?
356  if (Active.Len == 0) {
357  // If not, then say the active index is the end index.
358  Active.Idx = EndIdx;
359  }
360 
361  assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
362 
363  // The first character in the current substring we're looking at.
364  unsigned FirstChar = Str[Active.Idx];
365 
366  // Have we inserted anything starting with FirstChar at the current node?
367  if (Active.Node->Children.count(FirstChar) == 0) {
368  // If not, then we can just insert a leaf and move too the next step.
369  insertLeaf(*Active.Node, EndIdx, FirstChar);
370 
371  // The active node is an internal node, and we visited it, so it must
372  // need a link if it doesn't have one.
373  if (NeedsLink) {
374  NeedsLink->Link = Active.Node;
375  NeedsLink = nullptr;
376  }
377  } else {
378  // There's a match with FirstChar, so look for the point in the tree to
379  // insert a new node.
380  SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
381 
382  unsigned SubstringLen = NextNode->size();
383 
384  // Is the current suffix we're trying to insert longer than the size of
385  // the child we want to move to?
386  if (Active.Len >= SubstringLen) {
387  // If yes, then consume the characters we've seen and move to the next
388  // node.
389  Active.Idx += SubstringLen;
390  Active.Len -= SubstringLen;
391  Active.Node = NextNode;
392  continue;
393  }
394 
395  // Otherwise, the suffix we're trying to insert must be contained in the
396  // next node we want to move to.
397  unsigned LastChar = Str[EndIdx];
398 
399  // Is the string we're trying to insert a substring of the next node?
400  if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
401  // If yes, then we're done for this step. Remember our insertion point
402  // and move to the next end index. At this point, we have an implicit
403  // suffix tree.
404  if (NeedsLink && !Active.Node->isRoot()) {
405  NeedsLink->Link = Active.Node;
406  NeedsLink = nullptr;
407  }
408 
409  Active.Len++;
410  break;
411  }
412 
413  // The string we're trying to insert isn't a substring of the next node,
414  // but matches up to a point. Split the node.
415  //
416  // For example, say we ended our search at a node n and we're trying to
417  // insert ABD. Then we'll create a new node s for AB, reduce n to just
418  // representing C, and insert a new leaf node l to represent d. This
419  // allows us to ensure that if n was a leaf, it remains a leaf.
420  //
421  // | ABC ---split---> | AB
422  // n s
423  // C / \ D
424  // n l
425 
426  // The node s from the diagram
427  SuffixTreeNode *SplitNode =
428  insertInternalNode(Active.Node, NextNode->StartIdx,
429  NextNode->StartIdx + Active.Len - 1, FirstChar);
430 
431  // Insert the new node representing the new substring into the tree as
432  // a child of the split node. This is the node l from the diagram.
433  insertLeaf(*SplitNode, EndIdx, LastChar);
434 
435  // Make the old node a child of the split node and update its start
436  // index. This is the node n from the diagram.
437  NextNode->StartIdx += Active.Len;
438  SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
439 
440  // SplitNode is an internal node, update the suffix link.
441  if (NeedsLink)
442  NeedsLink->Link = SplitNode;
443 
444  NeedsLink = SplitNode;
445  }
446 
447  // We've added something new to the tree, so there's one less suffix to
448  // add.
449  SuffixesToAdd--;
450 
451  if (Active.Node->isRoot()) {
452  if (Active.Len > 0) {
453  Active.Len--;
454  Active.Idx = EndIdx - SuffixesToAdd + 1;
455  }
456  } else {
457  // Start the next phase at the next smallest suffix.
458  Active.Node = Active.Node->Link;
459  }
460  }
461 
462  return SuffixesToAdd;
463  }
464 
465 public:
466  /// Construct a suffix tree from a sequence of unsigned integers.
467  ///
468  /// \param Str The string to construct the suffix tree for.
469  SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
470  Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
471  Active.Node = Root;
472 
473  // Keep track of the number of suffixes we have to add of the current
474  // prefix.
475  unsigned SuffixesToAdd = 0;
476  Active.Node = Root;
477 
478  // Construct the suffix tree iteratively on each prefix of the string.
479  // PfxEndIdx is the end index of the current prefix.
480  // End is one past the last element in the string.
481  for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
482  PfxEndIdx++) {
483  SuffixesToAdd++;
484  LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
485  SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
486  }
487 
488  // Set the suffix indices of each leaf.
489  assert(Root && "Root node can't be nullptr!");
490  setSuffixIndices(*Root, 0);
491  }
492 
493 
494  /// Iterator for finding all repeated substrings in the suffix tree.
495  struct RepeatedSubstringIterator {
496  private:
497  /// The current node we're visiting.
498  SuffixTreeNode *N = nullptr;
499 
500  /// The repeated substring associated with this node.
501  RepeatedSubstring RS;
502 
503  /// The nodes left to visit.
504  std::vector<SuffixTreeNode *> ToVisit;
505 
506  /// The minimum length of a repeated substring to find.
507  /// Since we're outlining, we want at least two instructions in the range.
508  /// FIXME: This may not be true for targets like X86 which support many
509  /// instruction lengths.
510  const unsigned MinLength = 2;
511 
512  /// Move the iterator to the next repeated substring.
513  void advance() {
514  // Clear the current state. If we're at the end of the range, then this
515  // is the state we want to be in.
516  RS = RepeatedSubstring();
517  N = nullptr;
518 
519  // Each leaf node represents a repeat of a string.
520  std::vector<SuffixTreeNode *> LeafChildren;
521 
522  // Continue visiting nodes until we find one which repeats more than once.
523  while (!ToVisit.empty()) {
524  SuffixTreeNode *Curr = ToVisit.back();
525  ToVisit.pop_back();
526  LeafChildren.clear();
527 
528  // Keep track of the length of the string associated with the node. If
529  // it's too short, we'll quit.
530  unsigned Length = Curr->ConcatLen;
531 
532  // Iterate over each child, saving internal nodes for visiting, and
533  // leaf nodes in LeafChildren. Internal nodes represent individual
534  // strings, which may repeat.
535  for (auto &ChildPair : Curr->Children) {
536  // Save all of this node's children for processing.
537  if (!ChildPair.second->isLeaf())
538  ToVisit.push_back(ChildPair.second);
539 
540  // It's not an internal node, so it must be a leaf. If we have a
541  // long enough string, then save the leaf children.
542  else if (Length >= MinLength)
543  LeafChildren.push_back(ChildPair.second);
544  }
545 
546  // The root never represents a repeated substring. If we're looking at
547  // that, then skip it.
548  if (Curr->isRoot())
549  continue;
550 
551  // Do we have any repeated substrings?
552  if (LeafChildren.size() >= 2) {
553  // Yes. Update the state to reflect this, and then bail out.
554  N = Curr;
555  RS.Length = Length;
556  for (SuffixTreeNode *Leaf : LeafChildren)
557  RS.StartIndices.push_back(Leaf->SuffixIdx);
558  break;
559  }
560  }
561 
562  // At this point, either NewRS is an empty RepeatedSubstring, or it was
563  // set in the above loop. Similarly, N is either nullptr, or the node
564  // associated with NewRS.
565  }
566 
567  public:
568  /// Return the current repeated substring.
569  RepeatedSubstring &operator*() { return RS; }
570 
571  RepeatedSubstringIterator &operator++() {
572  advance();
573  return *this;
574  }
575 
576  RepeatedSubstringIterator operator++(int I) {
577  RepeatedSubstringIterator It(*this);
578  advance();
579  return It;
580  }
581 
582  bool operator==(const RepeatedSubstringIterator &Other) {
583  return N == Other.N;
584  }
585  bool operator!=(const RepeatedSubstringIterator &Other) {
586  return !(*this == Other);
587  }
588 
589  RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
590  // Do we have a non-null node?
591  if (N) {
592  // Yes. At the first step, we need to visit all of N's children.
593  // Note: This means that we visit N last.
594  ToVisit.push_back(N);
595  advance();
596  }
597  }
598 };
599 
600  typedef RepeatedSubstringIterator iterator;
601  iterator begin() { return iterator(Root); }
602  iterator end() { return iterator(nullptr); }
603 };
604 
605 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
606 struct InstructionMapper {
607 
608  /// The next available integer to assign to a \p MachineInstr that
609  /// cannot be outlined.
610  ///
611  /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
612  unsigned IllegalInstrNumber = -3;
613 
614  /// The next available integer to assign to a \p MachineInstr that can
615  /// be outlined.
616  unsigned LegalInstrNumber = 0;
617 
618  /// Correspondence from \p MachineInstrs to unsigned integers.
620  InstructionIntegerMap;
621 
622  /// Correspondence between \p MachineBasicBlocks and target-defined flags.
624 
625  /// The vector of unsigned integers that the module is mapped to.
626  std::vector<unsigned> UnsignedVec;
627 
628  /// Stores the location of the instruction associated with the integer
629  /// at index i in \p UnsignedVec for each index i.
630  std::vector<MachineBasicBlock::iterator> InstrList;
631 
632  // Set if we added an illegal number in the previous step.
633  // Since each illegal number is unique, we only need one of them between
634  // each range of legal numbers. This lets us make sure we don't add more
635  // than one illegal number per range.
636  bool AddedIllegalLastTime = false;
637 
638  /// Maps \p *It to a legal integer.
639  ///
640  /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
641  /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
642  ///
643  /// \returns The integer that \p *It was mapped to.
644  unsigned mapToLegalUnsigned(
645  MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
646  bool &HaveLegalRange, unsigned &NumLegalInBlock,
647  std::vector<unsigned> &UnsignedVecForMBB,
648  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
649  // We added something legal, so we should unset the AddedLegalLastTime
650  // flag.
651  AddedIllegalLastTime = false;
652 
653  // If we have at least two adjacent legal instructions (which may have
654  // invisible instructions in between), remember that.
655  if (CanOutlineWithPrevInstr)
656  HaveLegalRange = true;
657  CanOutlineWithPrevInstr = true;
658 
659  // Keep track of the number of legal instructions we insert.
660  NumLegalInBlock++;
661 
662  // Get the integer for this instruction or give it the current
663  // LegalInstrNumber.
664  InstrListForMBB.push_back(It);
665  MachineInstr &MI = *It;
666  bool WasInserted;
668  ResultIt;
669  std::tie(ResultIt, WasInserted) =
670  InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
671  unsigned MINumber = ResultIt->second;
672 
673  // There was an insertion.
674  if (WasInserted)
675  LegalInstrNumber++;
676 
677  UnsignedVecForMBB.push_back(MINumber);
678 
679  // Make sure we don't overflow or use any integers reserved by the DenseMap.
680  if (LegalInstrNumber >= IllegalInstrNumber)
681  report_fatal_error("Instruction mapping overflow!");
682 
683  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
684  "Tried to assign DenseMap tombstone or empty key to instruction.");
685  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
686  "Tried to assign DenseMap tombstone or empty key to instruction.");
687 
688  return MINumber;
689  }
690 
691  /// Maps \p *It to an illegal integer.
692  ///
693  /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
694  /// IllegalInstrNumber.
695  ///
696  /// \returns The integer that \p *It was mapped to.
697  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
698  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
699  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
700  // Can't outline an illegal instruction. Set the flag.
701  CanOutlineWithPrevInstr = false;
702 
703  // Only add one illegal number per range of legal numbers.
704  if (AddedIllegalLastTime)
705  return IllegalInstrNumber;
706 
707  // Remember that we added an illegal number last time.
708  AddedIllegalLastTime = true;
709  unsigned MINumber = IllegalInstrNumber;
710 
711  InstrListForMBB.push_back(It);
712  UnsignedVecForMBB.push_back(IllegalInstrNumber);
713  IllegalInstrNumber--;
714 
715  assert(LegalInstrNumber < IllegalInstrNumber &&
716  "Instruction mapping overflow!");
717 
718  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
719  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
720 
721  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
722  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
723 
724  return MINumber;
725  }
726 
727  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
728  /// and appends it to \p UnsignedVec and \p InstrList.
729  ///
730  /// Two instructions are assigned the same integer if they are identical.
731  /// If an instruction is deemed unsafe to outline, then it will be assigned an
732  /// unique integer. The resulting mapping is placed into a suffix tree and
733  /// queried for candidates.
734  ///
735  /// \param MBB The \p MachineBasicBlock to be translated into integers.
736  /// \param TII \p TargetInstrInfo for the function.
737  void convertToUnsignedVec(MachineBasicBlock &MBB,
738  const TargetInstrInfo &TII) {
739  unsigned Flags = 0;
740 
741  // Don't even map in this case.
742  if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
743  return;
744 
745  // Store info for the MBB for later outlining.
746  MBBFlagsMap[&MBB] = Flags;
747 
749 
750  // The number of instructions in this block that will be considered for
751  // outlining.
752  unsigned NumLegalInBlock = 0;
753 
754  // True if we have at least two legal instructions which aren't separated
755  // by an illegal instruction.
756  bool HaveLegalRange = false;
757 
758  // True if we can perform outlining given the last mapped (non-invisible)
759  // instruction. This lets us know if we have a legal range.
760  bool CanOutlineWithPrevInstr = false;
761 
762  // FIXME: Should this all just be handled in the target, rather than using
763  // repeated calls to getOutliningType?
764  std::vector<unsigned> UnsignedVecForMBB;
765  std::vector<MachineBasicBlock::iterator> InstrListForMBB;
766 
767  for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
768  // Keep track of where this instruction is in the module.
769  switch (TII.getOutliningType(It, Flags)) {
770  case InstrType::Illegal:
771  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
772  UnsignedVecForMBB, InstrListForMBB);
773  break;
774 
775  case InstrType::Legal:
776  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
777  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
778  break;
779 
781  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
782  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
783  // The instruction also acts as a terminator, so we have to record that
784  // in the string.
785  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
786  InstrListForMBB);
787  break;
788 
790  // Normally this is set by mapTo(Blah)Unsigned, but we just want to
791  // skip this instruction. So, unset the flag here.
792  AddedIllegalLastTime = false;
793  break;
794  }
795  }
796 
797  // Are there enough legal instructions in the block for outlining to be
798  // possible?
799  if (HaveLegalRange) {
800  // After we're done every insertion, uniquely terminate this part of the
801  // "string". This makes sure we won't match across basic block or function
802  // boundaries since the "end" is encoded uniquely and thus appears in no
803  // repeated substring.
804  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
805  InstrListForMBB);
806  InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
807  InstrListForMBB.end());
808  UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
809  UnsignedVecForMBB.end());
810  }
811  }
812 
813  InstructionMapper() {
814  // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
815  // changed.
816  assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
817  "DenseMapInfo<unsigned>'s empty key isn't -1!");
819  "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
820  }
821 };
822 
823 /// An interprocedural pass which finds repeated sequences of
824 /// instructions and replaces them with calls to functions.
825 ///
826 /// Each instruction is mapped to an unsigned integer and placed in a string.
827 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
828 /// is then repeatedly queried for repeated sequences of instructions. Each
829 /// non-overlapping repeated sequence is then placed in its own
830 /// \p MachineFunction and each instance is then replaced with a call to that
831 /// function.
832 struct MachineOutliner : public ModulePass {
833 
834  static char ID;
835 
836  /// Set to true if the outliner should consider functions with
837  /// linkonceodr linkage.
838  bool OutlineFromLinkOnceODRs = false;
839 
840  /// Set to true if the outliner should run on all functions in the module
841  /// considered safe for outlining.
842  /// Set to true by default for compatibility with llc's -run-pass option.
843  /// Set when the pass is constructed in TargetPassConfig.
844  bool RunOnAllFunctions = true;
845 
846  StringRef getPassName() const override { return "Machine Outliner"; }
847 
848  void getAnalysisUsage(AnalysisUsage &AU) const override {
851  AU.setPreservesAll();
853  }
854 
855  MachineOutliner() : ModulePass(ID) {
857  }
858 
859  /// Remark output explaining that not outlining a set of candidates would be
860  /// better than outlining that set.
861  void emitNotOutliningCheaperRemark(
862  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
863  OutlinedFunction &OF);
864 
865  /// Remark output explaining that a function was outlined.
866  void emitOutlinedFunctionRemark(OutlinedFunction &OF);
867 
868  /// Find all repeated substrings that satisfy the outlining cost model by
869  /// constructing a suffix tree.
870  ///
871  /// If a substring appears at least twice, then it must be represented by
872  /// an internal node which appears in at least two suffixes. Each suffix
873  /// is represented by a leaf node. To do this, we visit each internal node
874  /// in the tree, using the leaf children of each internal node. If an
875  /// internal node represents a beneficial substring, then we use each of
876  /// its leaf children to find the locations of its substring.
877  ///
878  /// \param Mapper Contains outlining mapping information.
879  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
880  /// each type of candidate.
881  void findCandidates(InstructionMapper &Mapper,
882  std::vector<OutlinedFunction> &FunctionList);
883 
884  /// Replace the sequences of instructions represented by \p OutlinedFunctions
885  /// with calls to functions.
886  ///
887  /// \param M The module we are outlining from.
888  /// \param FunctionList A list of functions to be inserted into the module.
889  /// \param Mapper Contains the instruction mappings for the module.
890  bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
891  InstructionMapper &Mapper);
892 
893  /// Creates a function for \p OF and inserts it into the module.
894  MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
895  InstructionMapper &Mapper,
896  unsigned Name);
897 
898  /// Construct a suffix tree on the instructions in \p M and outline repeated
899  /// strings from that tree.
900  bool runOnModule(Module &M) override;
901 
902  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
903  /// function for remark emission.
904  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
905  DISubprogram *SP;
906  for (const Candidate &C : OF.Candidates)
907  if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
908  return SP;
909  return nullptr;
910  }
911 
912  /// Populate and \p InstructionMapper with instruction-to-integer mappings.
913  /// These are used to construct a suffix tree.
914  void populateMapper(InstructionMapper &Mapper, Module &M,
915  MachineModuleInfo &MMI);
916 
917  /// Initialize information necessary to output a size remark.
918  /// FIXME: This should be handled by the pass manager, not the outliner.
919  /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
920  /// pass manager.
921  void initSizeRemarkInfo(
922  const Module &M, const MachineModuleInfo &MMI,
923  StringMap<unsigned> &FunctionToInstrCount);
924 
925  /// Emit the remark.
926  // FIXME: This should be handled by the pass manager, not the outliner.
927  void emitInstrCountChangedRemark(
928  const Module &M, const MachineModuleInfo &MMI,
929  const StringMap<unsigned> &FunctionToInstrCount);
930 };
931 } // Anonymous namespace.
932 
933 char MachineOutliner::ID = 0;
934 
935 namespace llvm {
936 ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
937  MachineOutliner *OL = new MachineOutliner();
938  OL->RunOnAllFunctions = RunOnAllFunctions;
939  return OL;
940 }
941 
942 } // namespace llvm
943 
944 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
945  false)
946 
947 void MachineOutliner::emitNotOutliningCheaperRemark(
948  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
949  OutlinedFunction &OF) {
950  // FIXME: Right now, we arbitrarily choose some Candidate from the
951  // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
952  // We should probably sort these by function name or something to make sure
953  // the remarks are stable.
954  Candidate &C = CandidatesForRepeatedSeq.front();
955  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
956  MORE.emit([&]() {
957  MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
958  C.front()->getDebugLoc(), C.getMBB());
959  R << "Did not outline " << NV("Length", StringLen) << " instructions"
960  << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
961  << " locations."
962  << " Bytes from outlining all occurrences ("
963  << NV("OutliningCost", OF.getOutliningCost()) << ")"
964  << " >= Unoutlined instruction bytes ("
965  << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
966  << " (Also found at: ";
967 
968  // Tell the user the other places the candidate was found.
969  for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
970  R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
971  CandidatesForRepeatedSeq[i].front()->getDebugLoc());
972  if (i != e - 1)
973  R << ", ";
974  }
975 
976  R << ")";
977  return R;
978  });
979 }
980 
981 void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
982  MachineBasicBlock *MBB = &*OF.MF->begin();
983  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
984  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
985  MBB->findDebugLoc(MBB->begin()), MBB);
986  R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
987  << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
988  << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
989  << " locations. "
990  << "(Found at: ";
991 
992  // Tell the user the other places the candidate was found.
993  for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
994 
995  R << NV((Twine("StartLoc") + Twine(i)).str(),
996  OF.Candidates[i].front()->getDebugLoc());
997  if (i != e - 1)
998  R << ", ";
999  }
1000 
1001  R << ")";
1002 
1003  MORE.emit(R);
1004 }
1005 
1006 void
1007 MachineOutliner::findCandidates(InstructionMapper &Mapper,
1008  std::vector<OutlinedFunction> &FunctionList) {
1009  FunctionList.clear();
1010  SuffixTree ST(Mapper.UnsignedVec);
1011 
1012  // First, find dall of the repeated substrings in the tree of minimum length
1013  // 2.
1014  std::vector<Candidate> CandidatesForRepeatedSeq;
1015  for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
1016  CandidatesForRepeatedSeq.clear();
1017  SuffixTree::RepeatedSubstring RS = *It;
1018  unsigned StringLen = RS.Length;
1019  for (const unsigned &StartIdx : RS.StartIndices) {
1020  unsigned EndIdx = StartIdx + StringLen - 1;
1021  // Trick: Discard some candidates that would be incompatible with the
1022  // ones we've already found for this sequence. This will save us some
1023  // work in candidate selection.
1024  //
1025  // If two candidates overlap, then we can't outline them both. This
1026  // happens when we have candidates that look like, say
1027  //
1028  // AA (where each "A" is an instruction).
1029  //
1030  // We might have some portion of the module that looks like this:
1031  // AAAAAA (6 A's)
1032  //
1033  // In this case, there are 5 different copies of "AA" in this range, but
1034  // at most 3 can be outlined. If only outlining 3 of these is going to
1035  // be unbeneficial, then we ought to not bother.
1036  //
1037  // Note that two things DON'T overlap when they look like this:
1038  // start1...end1 .... start2...end2
1039  // That is, one must either
1040  // * End before the other starts
1041  // * Start after the other ends
1042  if (std::all_of(
1043  CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
1044  [&StartIdx, &EndIdx](const Candidate &C) {
1045  return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
1046  })) {
1047  // It doesn't overlap with anything, so we can outline it.
1048  // Each sequence is over [StartIt, EndIt].
1049  // Save the candidate and its location.
1050 
1051  MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
1052  MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
1053  MachineBasicBlock *MBB = StartIt->getParent();
1054 
1055  CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
1056  EndIt, MBB, FunctionList.size(),
1057  Mapper.MBBFlagsMap[MBB]);
1058  }
1059  }
1060 
1061  // We've found something we might want to outline.
1062  // Create an OutlinedFunction to store it and check if it'd be beneficial
1063  // to outline.
1064  if (CandidatesForRepeatedSeq.size() < 2)
1065  continue;
1066 
1067  // Arbitrarily choose a TII from the first candidate.
1068  // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
1069  const TargetInstrInfo *TII =
1070  CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
1071 
1072  OutlinedFunction OF =
1073  TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
1074 
1075  // If we deleted too many candidates, then there's nothing worth outlining.
1076  // FIXME: This should take target-specified instruction sizes into account.
1077  if (OF.Candidates.size() < 2)
1078  continue;
1079 
1080  // Is it better to outline this candidate than not?
1081  if (OF.getBenefit() < 1) {
1082  emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
1083  continue;
1084  }
1085 
1086  FunctionList.push_back(OF);
1087  }
1088 }
1089 
1091 MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
1092  InstructionMapper &Mapper,
1093  unsigned Name) {
1094 
1095  // Create the function name. This should be unique.
1096  // FIXME: We should have a better naming scheme. This should be stable,
1097  // regardless of changes to the outliner's cost model/traversal order.
1098  std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
1099 
1100  // Create the function using an IR-level function.
1101  LLVMContext &C = M.getContext();
1103  Function::ExternalLinkage, FunctionName, M);
1104 
1105  // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1106  // which gives us better results when we outline from linkonceodr functions.
1109 
1110  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
1111  // necessary.
1112 
1113  // Set optsize/minsize, so we don't insert padding between outlined
1114  // functions.
1115  F->addFnAttr(Attribute::OptimizeForSize);
1116  F->addFnAttr(Attribute::MinSize);
1117 
1118  // Include target features from an arbitrary candidate for the outlined
1119  // function. This makes sure the outlined function knows what kinds of
1120  // instructions are going into it. This is fine, since all parent functions
1121  // must necessarily support the instructions that are in the outlined region.
1122  Candidate &FirstCand = OF.Candidates.front();
1123  const Function &ParentFn = FirstCand.getMF()->getFunction();
1124  if (ParentFn.hasFnAttribute("target-features"))
1125  F->addFnAttr(ParentFn.getFnAttribute("target-features"));
1126 
1127  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1128  IRBuilder<> Builder(EntryBB);
1129  Builder.CreateRetVoid();
1130 
1131  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1133  MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1134  const TargetSubtargetInfo &STI = MF.getSubtarget();
1135  const TargetInstrInfo &TII = *STI.getInstrInfo();
1136 
1137  // Insert the new function into the module.
1138  MF.insert(MF.begin(), &MBB);
1139 
1140  for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
1141  ++I) {
1142  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1143  NewMI->dropMemRefs(MF);
1144 
1145  // Don't keep debug information for outlined instructions.
1146  NewMI->setDebugLoc(DebugLoc());
1147  MBB.insert(MBB.end(), NewMI);
1148  }
1149 
1150  TII.buildOutlinedFrame(MBB, MF, OF);
1151 
1152  // Outlined functions shouldn't preserve liveness.
1153  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
1154  MF.getRegInfo().freezeReservedRegs(MF);
1155 
1156  // If there's a DISubprogram associated with this outlined function, then
1157  // emit debug info for the outlined function.
1158  if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1159  // We have a DISubprogram. Get its DICompileUnit.
1160  DICompileUnit *CU = SP->getUnit();
1161  DIBuilder DB(M, true, CU);
1162  DIFile *Unit = SP->getFile();
1163  Mangler Mg;
1164  // Get the mangled name of the function for the linkage name.
1165  std::string Dummy;
1166  llvm::raw_string_ostream MangledNameStream(Dummy);
1167  Mg.getNameWithPrefix(MangledNameStream, F, false);
1168 
1169  DISubprogram *OutlinedSP = DB.createFunction(
1170  Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
1171  Unit /* File */,
1172  0 /* Line 0 is reserved for compiler-generated code. */,
1173  DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
1174  0, /* Line 0 is reserved for compiler-generated code. */
1175  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1176  /* Outlined code is optimized code by definition. */
1177  DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1178 
1179  // Don't add any new variables to the subprogram.
1180  DB.finalizeSubprogram(OutlinedSP);
1181 
1182  // Attach subprogram to the function.
1183  F->setSubprogram(OutlinedSP);
1184  // We're done with the DIBuilder.
1185  DB.finalize();
1186  }
1187 
1188  return &MF;
1189 }
1190 
1191 bool MachineOutliner::outline(Module &M,
1192  std::vector<OutlinedFunction> &FunctionList,
1193  InstructionMapper &Mapper) {
1194 
1195  bool OutlinedSomething = false;
1196 
1197  // Number to append to the current outlined function.
1198  unsigned OutlinedFunctionNum = 0;
1199 
1200  // Sort by benefit. The most beneficial functions should be outlined first.
1201  llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
1202  const OutlinedFunction &RHS) {
1203  return LHS.getBenefit() > RHS.getBenefit();
1204  });
1205 
1206  // Walk over each function, outlining them as we go along. Functions are
1207  // outlined greedily, based off the sort above.
1208  for (OutlinedFunction &OF : FunctionList) {
1209  // If we outlined something that overlapped with a candidate in a previous
1210  // step, then we can't outline from it.
1211  erase_if(OF.Candidates, [&Mapper](Candidate &C) {
1212  return std::any_of(
1213  Mapper.UnsignedVec.begin() + C.getStartIdx(),
1214  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1215  [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
1216  });
1217 
1218  // If we made it unbeneficial to outline this function, skip it.
1219  if (OF.getBenefit() < 1)
1220  continue;
1221 
1222  // It's beneficial. Create the function and outline its sequence's
1223  // occurrences.
1224  OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
1225  emitOutlinedFunctionRemark(OF);
1226  FunctionsCreated++;
1227  OutlinedFunctionNum++; // Created a function, move to the next name.
1228  MachineFunction *MF = OF.MF;
1229  const TargetSubtargetInfo &STI = MF->getSubtarget();
1230  const TargetInstrInfo &TII = *STI.getInstrInfo();
1231 
1232  // Replace occurrences of the sequence with calls to the new function.
1233  for (Candidate &C : OF.Candidates) {
1234  MachineBasicBlock &MBB = *C.getMBB();
1235  MachineBasicBlock::iterator StartIt = C.front();
1236  MachineBasicBlock::iterator EndIt = C.back();
1237 
1238  // Insert the call.
1239  auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
1240 
1241  // If the caller tracks liveness, then we need to make sure that
1242  // anything we outline doesn't break liveness assumptions. The outlined
1243  // functions themselves currently don't track liveness, but we should
1244  // make sure that the ranges we yank things out of aren't wrong.
1245  if (MBB.getParent()->getProperties().hasProperty(
1247  // Helper lambda for adding implicit def operands to the call
1248  // instruction. It also updates call site information for moved
1249  // code.
1250  auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
1251  for (MachineOperand &MOP : MI.operands()) {
1252  // Skip over anything that isn't a register.
1253  if (!MOP.isReg())
1254  continue;
1255 
1256  // If it's a def, add it to the call instruction.
1257  if (MOP.isDef())
1258  CallInst->addOperand(MachineOperand::CreateReg(
1259  MOP.getReg(), true, /* isDef = true */
1260  true /* isImp = true */));
1261  }
1262  if (MI.isCall())
1263  MI.getMF()->updateCallSiteInfo(&MI);
1264  };
1265  // Copy over the defs in the outlined range.
1266  // First inst in outlined range <-- Anything that's defined in this
1267  // ... .. range has to be added as an
1268  // implicit Last inst in outlined range <-- def to the call
1269  // instruction. Also remove call site information for outlined block
1270  // of code.
1271  std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
1272  }
1273 
1274  // Erase from the point after where the call was inserted up to, and
1275  // including, the final instruction in the sequence.
1276  // Erase needs one past the end, so we need std::next there too.
1277  MBB.erase(std::next(StartIt), std::next(EndIt));
1278 
1279  // Keep track of what we removed by marking them all as -1.
1280  std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
1281  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1282  [](unsigned &I) { I = static_cast<unsigned>(-1); });
1283  OutlinedSomething = true;
1284 
1285  // Statistics.
1286  NumOutlined++;
1287  }
1288  }
1289 
1290  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1291 
1292  return OutlinedSomething;
1293 }
1294 
1295 void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
1296  MachineModuleInfo &MMI) {
1297  // Build instruction mappings for each function in the module. Start by
1298  // iterating over each Function in M.
1299  for (Function &F : M) {
1300 
1301  // If there's nothing in F, then there's no reason to try and outline from
1302  // it.
1303  if (F.empty())
1304  continue;
1305 
1306  // There's something in F. Check if it has a MachineFunction associated with
1307  // it.
1309 
1310  // If it doesn't, then there's nothing to outline from. Move to the next
1311  // Function.
1312  if (!MF)
1313  continue;
1314 
1315  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1316 
1317  if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
1318  continue;
1319 
1320  // We have a MachineFunction. Ask the target if it's suitable for outlining.
1321  // If it isn't, then move on to the next Function in the module.
1322  if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
1323  continue;
1324 
1325  // We have a function suitable for outlining. Iterate over every
1326  // MachineBasicBlock in MF and try to map its instructions to a list of
1327  // unsigned integers.
1328  for (MachineBasicBlock &MBB : *MF) {
1329  // If there isn't anything in MBB, then there's no point in outlining from
1330  // it.
1331  // If there are fewer than 2 instructions in the MBB, then it can't ever
1332  // contain something worth outlining.
1333  // FIXME: This should be based off of the maximum size in B of an outlined
1334  // call versus the size in B of the MBB.
1335  if (MBB.empty() || MBB.size() < 2)
1336  continue;
1337 
1338  // Check if MBB could be the target of an indirect branch. If it is, then
1339  // we don't want to outline from it.
1340  if (MBB.hasAddressTaken())
1341  continue;
1342 
1343  // MBB is suitable for outlining. Map it to a list of unsigneds.
1344  Mapper.convertToUnsignedVec(MBB, *TII);
1345  }
1346  }
1347 }
1348 
1349 void MachineOutliner::initSizeRemarkInfo(
1350  const Module &M, const MachineModuleInfo &MMI,
1351  StringMap<unsigned> &FunctionToInstrCount) {
1352  // Collect instruction counts for every function. We'll use this to emit
1353  // per-function size remarks later.
1354  for (const Function &F : M) {
1356 
1357  // We only care about MI counts here. If there's no MachineFunction at this
1358  // point, then there won't be after the outliner runs, so let's move on.
1359  if (!MF)
1360  continue;
1361  FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1362  }
1363 }
1364 
1365 void MachineOutliner::emitInstrCountChangedRemark(
1366  const Module &M, const MachineModuleInfo &MMI,
1367  const StringMap<unsigned> &FunctionToInstrCount) {
1368  // Iterate over each function in the module and emit remarks.
1369  // Note that we won't miss anything by doing this, because the outliner never
1370  // deletes functions.
1371  for (const Function &F : M) {
1373 
1374  // The outliner never deletes functions. If we don't have a MF here, then we
1375  // didn't have one prior to outlining either.
1376  if (!MF)
1377  continue;
1378 
1379  std::string Fname = F.getName();
1380  unsigned FnCountAfter = MF->getInstructionCount();
1381  unsigned FnCountBefore = 0;
1382 
1383  // Check if the function was recorded before.
1384  auto It = FunctionToInstrCount.find(Fname);
1385 
1386  // Did we have a previously-recorded size? If yes, then set FnCountBefore
1387  // to that.
1388  if (It != FunctionToInstrCount.end())
1389  FnCountBefore = It->second;
1390 
1391  // Compute the delta and emit a remark if there was a change.
1392  int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1393  static_cast<int64_t>(FnCountBefore);
1394  if (FnDelta == 0)
1395  continue;
1396 
1397  MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1398  MORE.emit([&]() {
1399  MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1401  &MF->front());
1402  R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1403  << ": Function: "
1404  << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1405  << ": MI instruction count changed from "
1406  << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1407  FnCountBefore)
1408  << " to "
1409  << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1410  FnCountAfter)
1411  << "; Delta: "
1412  << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1413  return R;
1414  });
1415  }
1416 }
1417 
1418 bool MachineOutliner::runOnModule(Module &M) {
1419  // Check if there's anything in the module. If it's empty, then there's
1420  // nothing to outline.
1421  if (M.empty())
1422  return false;
1423 
1424  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1425 
1426  // If the user passed -enable-machine-outliner=always or
1427  // -enable-machine-outliner, the pass will run on all functions in the module.
1428  // Otherwise, if the target supports default outlining, it will run on all
1429  // functions deemed by the target to be worth outlining from by default. Tell
1430  // the user how the outliner is running.
1431  LLVM_DEBUG(
1432  dbgs() << "Machine Outliner: Running on ";
1433  if (RunOnAllFunctions)
1434  dbgs() << "all functions";
1435  else
1436  dbgs() << "target-default functions";
1437  dbgs() << "\n"
1438  );
1439 
1440  // If the user specifies that they want to outline from linkonceodrs, set
1441  // it here.
1442  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1443  InstructionMapper Mapper;
1444 
1445  // Prepare instruction mappings for the suffix tree.
1446  populateMapper(Mapper, M, MMI);
1447  std::vector<OutlinedFunction> FunctionList;
1448 
1449  // Find all of the outlining candidates.
1450  findCandidates(Mapper, FunctionList);
1451 
1452  // If we've requested size remarks, then collect the MI counts of every
1453  // function before outlining, and the MI counts after outlining.
1454  // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1455  // the pass manager's responsibility.
1456  // This could pretty easily be placed in outline instead, but because we
1457  // really ultimately *don't* want this here, it's done like this for now
1458  // instead.
1459 
1460  // Check if we want size remarks.
1461  bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1462  StringMap<unsigned> FunctionToInstrCount;
1463  if (ShouldEmitSizeRemarks)
1464  initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
1465 
1466  // Outline each of the candidates and return true if something was outlined.
1467  bool OutlinedSomething = outline(M, FunctionList, Mapper);
1468 
1469  // If we outlined something, we definitely changed the MI count of the
1470  // module. If we've asked for size remarks, then output them.
1471  // FIXME: This should be in the pass manager.
1472  if (ShouldEmitSizeRemarks && OutlinedSomething)
1473  emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
1474 
1475  return OutlinedSomething;
1476 }
void finalize()
Construct any deferred debug info descriptors.
Definition: DIBuilder.cpp:68
const Function & getFunction() const
Definition: Function.h:133
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
unsigned getInstructionCount() const
Return the number of MachineInstrs in this MachineFunction.
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:651
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
DiagnosticInfoOptimizationBase::Argument NV
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
const MachineFunctionProperties & getProperties() const
Get the function properties.
DIFile * getFile() const
Diagnostic information for applied optimization remarks.
This class represents a function call, abstracting a target machine&#39;s calling convention.
iterator find(StringRef Key)
Definition: StringMap.h:332
Externally visible function.
Definition: GlobalValue.h:48
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
F(f)
DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine, DINode::DIFlags Flags=DINode::FlagZero, DISubprogram::DISPFlags SPFlags=DISubprogram::SPFlagZero, DITemplateParameterArray TParams=nullptr, DISubprogram *Decl=nullptr, DITypeArray ThrownTypes=nullptr)
Create a new descriptor for the specified subprogram.
Definition: DIBuilder.cpp:751
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
AnalysisUsage & addRequired()
Definition: BitVector.h:937
virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const
Insert a call to an outlined function into the program.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
const HexagonInstrInfo * TII
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
DISubroutineType * createSubroutineType(DITypeRefArray ParameterTypes, DINode::DIFlags Flags=DINode::FlagZero, unsigned CC=0)
Create subroutine type.
Definition: DIBuilder.cpp:497
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2099
Diagnostic information for optimization analysis remarks.
Subprogram description.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:96
bool empty() const
Definition: Module.h:607
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr&#39;s memory reference descriptor list.
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const
Return true if the function can safely be outlined from.
void updateCallSiteInfo(const MachineInstr *Old, const MachineInstr *New=nullptr)
Update call sites info by deleting entry for Old call instruction.
TargetInstrInfo - Interface to description of machine instruction set.
DITypeRefArray getOrCreateTypeArray(ArrayRef< Metadata *> Elements)
Get a DITypeRefArray, create one if required.
Definition: DIBuilder.cpp:611
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const
Return true if the function should be outlined from by default.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void setSubprogram(DISubprogram *SP)
Set the attached subprogram.
Definition: Metadata.cpp:1500
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:135
bool isLeaf(ID id)
Returns true if the intrinsic is a leaf, i.e.
Definition: Function.cpp:1062
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
bool shouldEmitInstrCountChangedRemark()
Return true if size-info optimization remark is enabled, false otherwise.
Definition: Module.h:263
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const
Returns how or if MI should be outlined.
Contains all data structures shared between the outliner implemented in MachineOutliner.cpp and target implementations of the outliner.
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const
Optional target hook that returns true if MBB is safe to outline from, and returns any target-specifi...
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:296
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
Definition: Allocator.h:490
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Used in the streaming interface as the general argument type.
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
const MachineBasicBlock & front() const
MachineFunction & getOrCreateMachineFunction(const Function &F)
Returns the MachineFunction constructed for the IR function F.
ModulePass * createMachineOutlinerPass(bool RunOnAllFunctions=true)
This pass performs outlining on machine instructions directly before printing assembly.
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
void initializeMachineOutlinerPass(PassRegistry &)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const
Insert a custom frame for outlined functions.
The optimization diagnostic interface.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr...
MachineOperand class - Representation of each machine instruction operand.
#define MORE()
Definition: regcomp.c:252
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:441
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1332
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
void setPreservesAll()
Set by analyses that do not transform their input at all.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1977
Representation of each machine instruction.
Definition: MachineInstr.h:64
ReturnInst * CreateRetVoid()
Create a &#39;ret void&#39; instruction.
Definition: IRBuilder.h:860
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:219
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
void finalizeSubprogram(DISubprogram *SP)
Finalize a specific subprogram - no new variables may be added to this subprogram afterwards...
Definition: DIBuilder.cpp:48
Diagnostic information for missed-optimization remarks.
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasProperty(Property P) const
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable&#39;s name.
Definition: Mangler.cpp:111
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:229
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1975
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1158
static cl::opt< bool > EnableLinkOnceODROutlining("enable-linkonceodr-outlining", cl::Hidden, cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false))
#define LLVM_DEBUG(X)
Definition: Debug.h:122
virtual outliner::OutlinedFunction getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const
Returns a outliner::OutlinedFunction struct containing target-specific information for a set of outli...
#define DEBUG_TYPE
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
iterator end()
Definition: StringMap.h:317
This class contains meta information specific to a module.