LLVM  14.0.0git
IROutliner.cpp
Go to the documentation of this file.
1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
18 #include "llvm/IR/Attributes.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Pass.h"
27 #include "llvm/Transforms/IPO.h"
28 #include <map>
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
39 namespace llvm {
41 } // namespace llvm
42 
43 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
44 // functions. This is false by default because the linker can dedupe linkonceodr
45 // functions. Since the outliner is confined to a single module (modulo LTO),
46 // this is off by default. It should, however, be the default behavior in
47 // LTO.
49  "enable-linkonceodr-ir-outlining", cl::Hidden,
50  cl::desc("Enable the IR outliner on linkonceodr functions"),
51  cl::init(false));
52 
53 // This is a debug option to test small pieces of code to ensure that outlining
54 // works correctly.
56  "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
57  cl::desc("Debug option to outline greedily, without restriction that "
58  "calculated benefit outweighs cost"));
59 
60 /// The OutlinableGroup holds all the overarching information for outlining
61 /// a set of regions that are structurally similar to one another, such as the
62 /// types of the overall function, the output blocks, the sets of stores needed
63 /// and a list of the different regions. This information is used in the
64 /// deduplication of extracted regions with the same structure.
66  /// The sections that could be outlined
67  std::vector<OutlinableRegion *> Regions;
68 
69  /// The argument types for the function created as the overall function to
70  /// replace the extracted function for each region.
71  std::vector<Type *> ArgumentTypes;
72  /// The FunctionType for the overall function.
73  FunctionType *OutlinedFunctionType = nullptr;
74  /// The Function for the collective overall function.
75  Function *OutlinedFunction = nullptr;
76 
77  /// Flag for whether we should not consider this group of OutlinableRegions
78  /// for extraction.
79  bool IgnoreGroup = false;
80 
81  /// The return blocks for the overall function.
83 
84  /// The PHIBlocks with their corresponding return block based on the return
85  /// value as the key.
87 
88  /// A set containing the different GVN store sets needed. Each array contains
89  /// a sorted list of the different values that need to be stored into output
90  /// registers.
92 
93  /// Flag for whether the \ref ArgumentTypes have been defined after the
94  /// extraction of the first region.
95  bool InputTypesSet = false;
96 
97  /// The number of input values in \ref ArgumentTypes. Anything after this
98  /// index in ArgumentTypes is an output argument.
99  unsigned NumAggregateInputs = 0;
100 
101  /// The mapping of the canonical numbering of the values in outlined sections
102  /// to specific arguments.
104 
105  /// The number of branches in the region target a basic block that is outside
106  /// of the region.
107  unsigned BranchesToOutside = 0;
108 
109  /// The number of instructions that will be outlined by extracting \ref
110  /// Regions.
111  InstructionCost Benefit = 0;
112  /// The number of added instructions needed for the outlining of the \ref
113  /// Regions.
114  InstructionCost Cost = 0;
115 
116  /// The argument that needs to be marked with the swifterr attribute. If not
117  /// needed, there is no value.
119 
120  /// For the \ref Regions, we look at every Value. If it is a constant,
121  /// we check whether it is the same in Region.
122  ///
123  /// \param [in,out] NotSame contains the global value numbers where the
124  /// constant is not always the same, and must be passed in as an argument.
125  void findSameConstants(DenseSet<unsigned> &NotSame);
126 
127  /// For the regions, look at each set of GVN stores needed and account for
128  /// each combination. Add an argument to the argument types if there is
129  /// more than one combination.
130  ///
131  /// \param [in] M - The module we are outlining from.
132  void collectGVNStoreSets(Module &M);
133 };
134 
135 /// Move the contents of \p SourceBB to before the last instruction of \p
136 /// TargetBB.
137 /// \param SourceBB - the BasicBlock to pull Instructions from.
138 /// \param TargetBB - the BasicBlock to put Instruction into.
139 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
140  for (Instruction &I : llvm::make_early_inc_range(SourceBB))
141  I.moveBefore(TargetBB, TargetBB.end());
142 }
143 
144 /// A function to sort the keys of \p Map, which must be a mapping of constant
145 /// values to basic blocks and return it in \p SortedKeys
146 ///
147 /// \param SortedKeys - The vector the keys will be return in and sorted.
148 /// \param Map - The DenseMap containing keys to sort.
149 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
151  for (auto &VtoBB : Map)
152  SortedKeys.push_back(VtoBB.first);
153 
154  stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
155  const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
156  const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
157  assert(RHSC && "Not a constant integer in return value?");
158  assert(LHSC && "Not a constant integer in return value?");
159 
160  return LHSC->getLimitedValue() < RHSC->getLimitedValue();
161  });
162 }
163 
165  Value *V) {
166  Optional<unsigned> GVN = Candidate->getGVN(V);
167  assert(GVN.hasValue() && "No GVN for incoming value");
168  Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
169  Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
170  Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
171  return FoundValueOpt.getValueOr(nullptr);
172 }
173 
175  assert(!CandidateSplit && "Candidate already split!");
176 
177  Instruction *BackInst = Candidate->backInstruction();
178 
179  Instruction *EndInst = nullptr;
180  // Check whether the last instruction is a terminator, if it is, we do
181  // not split on the following instruction. We leave the block as it is. We
182  // also check that this is not the last instruction in the Module, otherwise
183  // the check for whether the current following instruction matches the
184  // previously recorded instruction will be incorrect.
185  if (!BackInst->isTerminator() ||
186  BackInst->getParent() != &BackInst->getFunction()->back()) {
187  EndInst = Candidate->end()->Inst;
188  assert(EndInst && "Expected an end instruction?");
189  }
190 
191  // We check if the current instruction following the last instruction in the
192  // region is the same as the recorded instruction following the last
193  // instruction. If they do not match, there could be problems in rewriting
194  // the program after outlining, so we ignore it.
195  if (!BackInst->isTerminator() &&
196  EndInst != BackInst->getNextNonDebugInstruction())
197  return;
198 
199  Instruction *StartInst = (*Candidate->begin()).Inst;
200  assert(StartInst && "Expected a start instruction?");
201  StartBB = StartInst->getParent();
202  PrevBB = StartBB;
203 
204  // The basic block gets split like so:
205  // block: block:
206  // inst1 inst1
207  // inst2 inst2
208  // region1 br block_to_outline
209  // region2 block_to_outline:
210  // region3 -> region1
211  // region4 region2
212  // inst3 region3
213  // inst4 region4
214  // br block_after_outline
215  // block_after_outline:
216  // inst3
217  // inst4
218 
219  std::string OriginalName = PrevBB->getName().str();
220 
221  StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
222  PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
223 
224  CandidateSplit = true;
225  if (!BackInst->isTerminator()) {
226  EndBB = EndInst->getParent();
227  FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
228  EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
229  FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
230  return;
231  }
232 
233  EndBB = BackInst->getParent();
234  EndsInBranch = true;
235  FollowBB = nullptr;
236 }
237 
239  assert(CandidateSplit && "Candidate is not split!");
240 
241  // The basic block gets reattached like so:
242  // block: block:
243  // inst1 inst1
244  // inst2 inst2
245  // br block_to_outline region1
246  // block_to_outline: -> region2
247  // region1 region3
248  // region2 region4
249  // region3 inst3
250  // region4 inst4
251  // br block_after_outline
252  // block_after_outline:
253  // inst3
254  // inst4
255  assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
256 
257  // StartBB should only have one predecessor since we put an unconditional
258  // branch at the end of PrevBB when we split the BasicBlock.
259  PrevBB = StartBB->getSinglePredecessor();
260  assert(PrevBB != nullptr &&
261  "No Predecessor for the region start basic block!");
262 
263  assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
264  PrevBB->getTerminator()->eraseFromParent();
265 
266  moveBBContents(*StartBB, *PrevBB);
267 
268  BasicBlock *PlacementBB = PrevBB;
269  if (StartBB != EndBB)
270  PlacementBB = EndBB;
271  if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
272  assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
273  assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
274  PlacementBB->getTerminator()->eraseFromParent();
275  moveBBContents(*FollowBB, *PlacementBB);
276  PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
277  FollowBB->eraseFromParent();
278  }
279 
280  PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
281  StartBB->eraseFromParent();
282 
283  // Make sure to save changes back to the StartBB.
284  StartBB = PrevBB;
285  EndBB = nullptr;
286  PrevBB = nullptr;
287  FollowBB = nullptr;
288 
289  CandidateSplit = false;
290 }
291 
292 /// Find whether \p V matches the Constants previously found for the \p GVN.
293 ///
294 /// \param V - The value to check for consistency.
295 /// \param GVN - The global value number assigned to \p V.
296 /// \param GVNToConstant - The mapping of global value number to Constants.
297 /// \returns true if the Value matches the Constant mapped to by V and false if
298 /// it \p V is a Constant but does not match.
299 /// \returns None if \p V is not a Constant.
300 static Optional<bool>
301 constantMatches(Value *V, unsigned GVN,
302  DenseMap<unsigned, Constant *> &GVNToConstant) {
303  // See if we have a constants
304  Constant *CST = dyn_cast<Constant>(V);
305  if (!CST)
306  return None;
307 
308  // Holds a mapping from a global value number to a Constant.
310  bool Inserted;
311 
312 
313  // If we have a constant, try to make a new entry in the GVNToConstant.
314  std::tie(GVNToConstantIt, Inserted) =
315  GVNToConstant.insert(std::make_pair(GVN, CST));
316  // If it was found and is not equal, it is not the same. We do not
317  // handle this case yet, and exit early.
318  if (Inserted || (GVNToConstantIt->second == CST))
319  return true;
320 
321  return false;
322 }
323 
325  InstructionCost Benefit = 0;
326 
327  // Estimate the benefit of outlining a specific sections of the program. We
328  // delegate mostly this task to the TargetTransformInfo so that if the target
329  // has specific changes, we can have a more accurate estimate.
330 
331  // However, getInstructionCost delegates the code size calculation for
332  // arithmetic instructions to getArithmeticInstrCost in
333  // include/Analysis/TargetTransformImpl.h, where it always estimates that the
334  // code size for a division and remainder instruction to be equal to 4, and
335  // everything else to 1. This is not an accurate representation of the
336  // division instruction for targets that have a native division instruction.
337  // To be overly conservative, we only add 1 to the number of instructions for
338  // each division instruction.
339  for (IRInstructionData &ID : *Candidate) {
340  Instruction *I = ID.Inst;
341  switch (I->getOpcode()) {
342  case Instruction::FDiv:
343  case Instruction::FRem:
344  case Instruction::SDiv:
345  case Instruction::SRem:
346  case Instruction::UDiv:
347  case Instruction::URem:
348  Benefit += 1;
349  break;
350  default:
352  break;
353  }
354  }
355 
356  return Benefit;
357 }
358 
359 /// Find whether \p Region matches the global value numbering to Constant
360 /// mapping found so far.
361 ///
362 /// \param Region - The OutlinableRegion we are checking for constants
363 /// \param GVNToConstant - The mapping of global value number to Constants.
364 /// \param NotSame - The set of global value numbers that do not have the same
365 /// constant in each region.
366 /// \returns true if all Constants are the same in every use of a Constant in \p
367 /// Region and false if not
368 static bool
370  DenseMap<unsigned, Constant *> &GVNToConstant,
371  DenseSet<unsigned> &NotSame) {
372  bool ConstantsTheSame = true;
373 
374  IRSimilarityCandidate &C = *Region.Candidate;
375  for (IRInstructionData &ID : C) {
376 
377  // Iterate over the operands in an instruction. If the global value number,
378  // assigned by the IRSimilarityCandidate, has been seen before, we check if
379  // the the number has been found to be not the same value in each instance.
380  for (Value *V : ID.OperVals) {
381  Optional<unsigned> GVNOpt = C.getGVN(V);
382  assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
383  unsigned GVN = GVNOpt.getValue();
384 
385  // Check if this global value has been found to not be the same already.
386  if (NotSame.contains(GVN)) {
387  if (isa<Constant>(V))
388  ConstantsTheSame = false;
389  continue;
390  }
391 
392  // If it has been the same so far, we check the value for if the
393  // associated Constant value match the previous instances of the same
394  // global value number. If the global value does not map to a Constant,
395  // it is considered to not be the same value.
396  Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
397  if (ConstantMatches.hasValue()) {
398  if (ConstantMatches.getValue())
399  continue;
400  else
401  ConstantsTheSame = false;
402  }
403 
404  // While this value is a register, it might not have been previously,
405  // make sure we don't already have a constant mapped to this global value
406  // number.
407  if (GVNToConstant.find(GVN) != GVNToConstant.end())
408  ConstantsTheSame = false;
409 
410  NotSame.insert(GVN);
411  }
412  }
413 
414  return ConstantsTheSame;
415 }
416 
418  DenseMap<unsigned, Constant *> GVNToConstant;
419 
420  for (OutlinableRegion *Region : Regions)
421  collectRegionsConstants(*Region, GVNToConstant, NotSame);
422 }
423 
425  for (OutlinableRegion *OS : Regions)
426  OutputGVNCombinations.insert(OS->GVNStores);
427 
428  // We are adding an extracted argument to decide between which output path
429  // to use in the basic block. It is used in a switch statement and only
430  // needs to be an integer.
431  if (OutputGVNCombinations.size() > 1)
432  ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
433 }
434 
435 /// Get the subprogram if it exists for one of the outlined regions.
436 ///
437 /// \param [in] Group - The set of regions to find a subprogram for.
438 /// \returns the subprogram if it exists, or nullptr.
440  for (OutlinableRegion *OS : Group.Regions)
441  if (Function *F = OS->Call->getFunction())
442  if (DISubprogram *SP = F->getSubprogram())
443  return SP;
444 
445  return nullptr;
446 }
447 
448 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
449  unsigned FunctionNameSuffix) {
450  assert(!Group.OutlinedFunction && "Function is already defined!");
451 
452  Type *RetTy = Type::getVoidTy(M.getContext());
453  // All extracted functions _should_ have the same return type at this point
454  // since the similarity identifier ensures that all branches outside of the
455  // region occur in the same place.
456 
457  // NOTE: Should we ever move to the model that uses a switch at every point
458  // needed, meaning that we could branch within the region or out, it is
459  // possible that we will need to switch to using the most general case all of
460  // the time.
461  for (OutlinableRegion *R : Group.Regions) {
462  Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
463  if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
464  (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
465  RetTy = ExtractedFuncType;
466  }
467 
469  RetTy, Group.ArgumentTypes, false);
470 
471  // These functions will only be called from within the same module, so
472  // we can set an internal linkage.
475  "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
476 
477  // Transfer the swifterr attribute to the correct function parameter.
478  if (Group.SwiftErrorArgument.hasValue())
480  Attribute::SwiftError);
481 
482  Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
483  Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
484 
485  // If there's a DISubprogram associated with this outlined function, then
486  // emit debug info for the outlined function.
487  if (DISubprogram *SP = getSubprogramOrNull(Group)) {
488  Function *F = Group.OutlinedFunction;
489  // We have a DISubprogram. Get its DICompileUnit.
490  DICompileUnit *CU = SP->getUnit();
491  DIBuilder DB(M, true, CU);
492  DIFile *Unit = SP->getFile();
493  Mangler Mg;
494  // Get the mangled name of the function for the linkage name.
495  std::string Dummy;
496  llvm::raw_string_ostream MangledNameStream(Dummy);
497  Mg.getNameWithPrefix(MangledNameStream, F, false);
498 
499  DISubprogram *OutlinedSP = DB.createFunction(
500  Unit /* Context */, F->getName(), MangledNameStream.str(),
501  Unit /* File */,
502  0 /* Line 0 is reserved for compiler-generated code. */,
503  DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
504  0, /* Line 0 is reserved for compiler-generated code. */
505  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
506  /* Outlined code is optimized code by definition. */
507  DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
508 
509  // Don't add any new variables to the subprogram.
510  DB.finalizeSubprogram(OutlinedSP);
511 
512  // Attach subprogram to the function.
513  F->setSubprogram(OutlinedSP);
514  // We're done with the DIBuilder.
515  DB.finalize();
516  }
517 
518  return Group.OutlinedFunction;
519 }
520 
521 /// Move each BasicBlock in \p Old to \p New.
522 ///
523 /// \param [in] Old - The function to move the basic blocks from.
524 /// \param [in] New - The function to move the basic blocks to.
525 /// \param [out] NewEnds - The return blocks of the new overall function.
526 static void moveFunctionData(Function &Old, Function &New,
528  for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
529  CurrBB.removeFromParent();
530  CurrBB.insertInto(&New);
531  Instruction *I = CurrBB.getTerminator();
532 
533  // For each block we find a return instruction is, it is a potential exit
534  // path for the function. We keep track of each block based on the return
535  // value here.
536  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
537  NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
538 
539  std::vector<Instruction *> DebugInsts;
540 
541  for (Instruction &Val : CurrBB) {
542  // We must handle the scoping of called functions differently than
543  // other outlined instructions.
544  if (!isa<CallInst>(&Val)) {
545  // Remove the debug information for outlined functions.
546  Val.setDebugLoc(DebugLoc());
547  continue;
548  }
549 
550  // From this point we are only handling call instructions.
551  CallInst *CI = cast<CallInst>(&Val);
552 
553  // We add any debug statements here, to be removed after. Since the
554  // instructions originate from many different locations in the program,
555  // it will cause incorrect reporting from a debugger if we keep the
556  // same debug instructions.
557  if (isa<DbgInfoIntrinsic>(CI)) {
558  DebugInsts.push_back(&Val);
559  continue;
560  }
561 
562  // Edit the scope of called functions inside of outlined functions.
563  if (DISubprogram *SP = New.getSubprogram()) {
564  DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
565  Val.setDebugLoc(DI);
566  }
567  }
568 
569  for (Instruction *I : DebugInsts)
570  I->eraseFromParent();
571  }
572 
573  assert(NewEnds.size() > 0 && "No return instruction for new function?");
574 }
575 
576 /// Find the the constants that will need to be lifted into arguments
577 /// as they are not the same in each instance of the region.
578 ///
579 /// \param [in] C - The IRSimilarityCandidate containing the region we are
580 /// analyzing.
581 /// \param [in] NotSame - The set of global value numbers that do not have a
582 /// single Constant across all OutlinableRegions similar to \p C.
583 /// \param [out] Inputs - The list containing the global value numbers of the
584 /// arguments needed for the region of code.
586  std::vector<unsigned> &Inputs) {
587  DenseSet<unsigned> Seen;
588  // Iterate over the instructions, and find what constants will need to be
589  // extracted into arguments.
590  for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
591  IDIt != EndIDIt; IDIt++) {
592  for (Value *V : (*IDIt).OperVals) {
593  // Since these are stored before any outlining, they will be in the
594  // global value numbering.
595  unsigned GVN = C.getGVN(V).getValue();
596  if (isa<Constant>(V))
597  if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
598  Inputs.push_back(GVN);
599  Seen.insert(GVN);
600  }
601  }
602  }
603 }
604 
605 /// Find the GVN for the inputs that have been found by the CodeExtractor.
606 ///
607 /// \param [in] C - The IRSimilarityCandidate containing the region we are
608 /// analyzing.
609 /// \param [in] CurrentInputs - The set of inputs found by the
610 /// CodeExtractor.
611 /// \param [in] OutputMappings - The mapping of values that have been replaced
612 /// by a new output value.
613 /// \param [out] EndInputNumbers - The global value numbers for the extracted
614 /// arguments.
616  SetVector<Value *> &CurrentInputs,
617  const DenseMap<Value *, Value *> &OutputMappings,
618  std::vector<unsigned> &EndInputNumbers) {
619  // Get the Global Value Number for each input. We check if the Value has been
620  // replaced by a different value at output, and use the original value before
621  // replacement.
622  for (Value *Input : CurrentInputs) {
623  assert(Input && "Have a nullptr as an input");
624  if (OutputMappings.find(Input) != OutputMappings.end())
625  Input = OutputMappings.find(Input)->second;
626  assert(C.getGVN(Input).hasValue() &&
627  "Could not find a numbering for the given input");
628  EndInputNumbers.push_back(C.getGVN(Input).getValue());
629  }
630 }
631 
632 /// Find the original value for the \p ArgInput values if any one of them was
633 /// replaced during a previous extraction.
634 ///
635 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
636 /// \param [in] OutputMappings - The mapping of values that have been replaced
637 /// by a new output value.
638 /// \param [out] RemappedArgInputs - The remapped values according to
639 /// \p OutputMappings that will be extracted.
640 static void
642  const DenseMap<Value *, Value *> &OutputMappings,
643  SetVector<Value *> &RemappedArgInputs) {
644  // Get the global value number for each input that will be extracted as an
645  // argument by the code extractor, remapping if needed for reloaded values.
646  for (Value *Input : ArgInputs) {
647  if (OutputMappings.find(Input) != OutputMappings.end())
648  Input = OutputMappings.find(Input)->second;
649  RemappedArgInputs.insert(Input);
650  }
651 }
652 
653 /// Find the input GVNs and the output values for a region of Instructions.
654 /// Using the code extractor, we collect the inputs to the extracted function.
655 ///
656 /// The \p Region can be identified as needing to be ignored in this function.
657 /// It should be checked whether it should be ignored after a call to this
658 /// function.
659 ///
660 /// \param [in,out] Region - The region of code to be analyzed.
661 /// \param [out] InputGVNs - The global value numbers for the extracted
662 /// arguments.
663 /// \param [in] NotSame - The global value numbers in the region that do not
664 /// have the same constant value in the regions structurally similar to
665 /// \p Region.
666 /// \param [in] OutputMappings - The mapping of values that have been replaced
667 /// by a new output value after extraction.
668 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
669 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
670 /// as outputs.
672  OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
673  DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
674  SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
675  IRSimilarityCandidate &C = *Region.Candidate;
676 
677  // OverallInputs are the inputs to the region found by the CodeExtractor,
678  // SinkCands and HoistCands are used by the CodeExtractor to find sunken
679  // allocas of values whose lifetimes are contained completely within the
680  // outlined region. PremappedInputs are the arguments found by the
681  // CodeExtractor, removing conditions such as sunken allocas, but that
682  // may need to be remapped due to the extracted output values replacing
683  // the original values. We use DummyOutputs for this first run of finding
684  // inputs and outputs since the outputs could change during findAllocas,
685  // the correct set of extracted outputs will be in the final Outputs ValueSet.
686  SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
687  DummyOutputs;
688 
689  // Use the code extractor to get the inputs and outputs, without sunken
690  // allocas or removing llvm.assumes.
691  CodeExtractor *CE = Region.CE;
692  CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
693  assert(Region.StartBB && "Region must have a start BasicBlock!");
694  Function *OrigF = Region.StartBB->getParent();
695  CodeExtractorAnalysisCache CEAC(*OrigF);
696  BasicBlock *Dummy = nullptr;
697 
698  // The region may be ineligible due to VarArgs in the parent function. In this
699  // case we ignore the region.
700  if (!CE->isEligible()) {
701  Region.IgnoreRegion = true;
702  return;
703  }
704 
705  // Find if any values are going to be sunk into the function when extracted
706  CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
707  CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
708 
709  // TODO: Support regions with sunken allocas: values whose lifetimes are
710  // contained completely within the outlined region. These are not guaranteed
711  // to be the same in every region, so we must elevate them all to arguments
712  // when they appear. If these values are not equal, it means there is some
713  // Input in OverallInputs that was removed for ArgInputs.
714  if (OverallInputs.size() != PremappedInputs.size()) {
715  Region.IgnoreRegion = true;
716  return;
717  }
718 
719  findConstants(C, NotSame, InputGVNs);
720 
721  mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
722 
723  remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
724  ArgInputs);
725 
726  // Sort the GVNs, since we now have constants included in the \ref InputGVNs
727  // we need to make sure they are in a deterministic order.
728  stable_sort(InputGVNs);
729 }
730 
731 /// Look over the inputs and map each input argument to an argument in the
732 /// overall function for the OutlinableRegions. This creates a way to replace
733 /// the arguments of the extracted function with the arguments of the new
734 /// overall function.
735 ///
736 /// \param [in,out] Region - The region of code to be analyzed.
737 /// \param [in] InputGVNs - The global value numbering of the input values
738 /// collected.
739 /// \param [in] ArgInputs - The values of the arguments to the extracted
740 /// function.
741 static void
743  std::vector<unsigned> &InputGVNs,
744  SetVector<Value *> &ArgInputs) {
745 
746  IRSimilarityCandidate &C = *Region.Candidate;
747  OutlinableGroup &Group = *Region.Parent;
748 
749  // This counts the argument number in the overall function.
750  unsigned TypeIndex = 0;
751 
752  // This counts the argument number in the extracted function.
753  unsigned OriginalIndex = 0;
754 
755  // Find the mapping of the extracted arguments to the arguments for the
756  // overall function. Since there may be extra arguments in the overall
757  // function to account for the extracted constants, we have two different
758  // counters as we find extracted arguments, and as we come across overall
759  // arguments.
760 
761  // Additionally, in our first pass, for the first extracted function,
762  // we find argument locations for the canonical value numbering. This
763  // numbering overrides any discovered location for the extracted code.
764  for (unsigned InputVal : InputGVNs) {
765  Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
766  assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
767  unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
768 
769  Optional<Value *> InputOpt = C.fromGVN(InputVal);
770  assert(InputOpt.hasValue() && "Global value number not found?");
771  Value *Input = InputOpt.getValue();
772 
774  Group.CanonicalNumberToAggArg.find(CanonicalNumber);
775 
776  if (!Group.InputTypesSet) {
777  Group.ArgumentTypes.push_back(Input->getType());
778  // If the input value has a swifterr attribute, make sure to mark the
779  // argument in the overall function.
780  if (Input->isSwiftError()) {
781  assert(
782  !Group.SwiftErrorArgument.hasValue() &&
783  "Argument already marked with swifterr for this OutlinableGroup!");
784  Group.SwiftErrorArgument = TypeIndex;
785  }
786  }
787 
788  // Check if we have a constant. If we do add it to the overall argument
789  // number to Constant map for the region, and continue to the next input.
790  if (Constant *CST = dyn_cast<Constant>(Input)) {
791  if (AggArgIt != Group.CanonicalNumberToAggArg.end())
792  Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
793  else {
795  std::make_pair(CanonicalNumber, TypeIndex));
796  Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
797  }
798  TypeIndex++;
799  continue;
800  }
801 
802  // It is not a constant, we create the mapping from extracted argument list
803  // to the overall argument list, using the canonical location, if it exists.
804  assert(ArgInputs.count(Input) && "Input cannot be found!");
805 
806  if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
807  if (OriginalIndex != AggArgIt->second)
808  Region.ChangedArgOrder = true;
809  Region.ExtractedArgToAgg.insert(
810  std::make_pair(OriginalIndex, AggArgIt->second));
811  Region.AggArgToExtracted.insert(
812  std::make_pair(AggArgIt->second, OriginalIndex));
813  } else {
815  std::make_pair(CanonicalNumber, TypeIndex));
816  Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
817  Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
818  }
819  OriginalIndex++;
820  TypeIndex++;
821  }
822 
823  // If the function type definitions for the OutlinableGroup holding the region
824  // have not been set, set the length of the inputs here. We should have the
825  // same inputs for all of the different regions contained in the
826  // OutlinableGroup since they are all structurally similar to one another.
827  if (!Group.InputTypesSet) {
828  Group.NumAggregateInputs = TypeIndex;
829  Group.InputTypesSet = true;
830  }
831 
832  Region.NumExtractedInputs = OriginalIndex;
833 }
834 
835 /// Create a mapping of the output arguments for the \p Region to the output
836 /// arguments of the overall outlined function.
837 ///
838 /// \param [in,out] Region - The region of code to be analyzed.
839 /// \param [in] Outputs - The values found by the code extractor.
840 static void
842  SetVector<Value *> &Outputs) {
843  OutlinableGroup &Group = *Region.Parent;
844  IRSimilarityCandidate &C = *Region.Candidate;
845 
848  C.getBasicBlocks(BBSet, BE);
849 
850  // Find the exits to the region.
852  for (BasicBlock *Block : BE)
853  for (BasicBlock *Succ : successors(Block))
854  if (!BBSet.contains(Succ))
855  Exits.insert(Succ);
856 
857  // After determining which blocks exit to PHINodes, we add these PHINodes to
858  // the set of outputs to be processed. We also check the incoming values of
859  // the PHINodes for whether they should no longer be considered outputs.
860  for (BasicBlock *ExitBB : Exits) {
861  for (PHINode &PN : ExitBB->phis()) {
862  // Find all incoming values from the outlining region.
863  SmallVector<unsigned, 2> IncomingVals;
864  for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
865  if (BBSet.contains(PN.getIncomingBlock(Idx)))
866  IncomingVals.push_back(Idx);
867 
868  // Do not process PHI if there is one (or fewer) predecessor from region.
869  if (IncomingVals.size() <= 1)
870  continue;
871 
872  Region.IgnoreRegion = true;
873  return;
874  }
875  }
876 
877  // This counts the argument number in the extracted function.
878  unsigned OriginalIndex = Region.NumExtractedInputs;
879 
880  // This counts the argument number in the overall function.
881  unsigned TypeIndex = Group.NumAggregateInputs;
882  bool TypeFound;
883  DenseSet<unsigned> AggArgsUsed;
884 
885  // Iterate over the output types and identify if there is an aggregate pointer
886  // type whose base type matches the current output type. If there is, we mark
887  // that we will use this output register for this value. If not we add another
888  // type to the overall argument type list. We also store the GVNs used for
889  // stores to identify which values will need to be moved into an special
890  // block that holds the stores to the output registers.
891  for (Value *Output : Outputs) {
892  TypeFound = false;
893  // We can do this since it is a result value, and will have a number
894  // that is necessarily the same. BUT if in the future, the instructions
895  // do not have to be in same order, but are functionally the same, we will
896  // have to use a different scheme, as one-to-one correspondence is not
897  // guaranteed.
898  unsigned GlobalValue = C.getGVN(Output).getValue();
899  unsigned ArgumentSize = Group.ArgumentTypes.size();
900 
901  for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
902  if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
903  continue;
904 
905  if (AggArgsUsed.contains(Jdx))
906  continue;
907 
908  TypeFound = true;
909  AggArgsUsed.insert(Jdx);
910  Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
911  Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
912  Region.GVNStores.push_back(GlobalValue);
913  break;
914  }
915 
916  // We were unable to find an unused type in the output type set that matches
917  // the output, so we add a pointer type to the argument types of the overall
918  // function to handle this output and create a mapping to it.
919  if (!TypeFound) {
920  Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
921  AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
922  Region.ExtractedArgToAgg.insert(
923  std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
924  Region.AggArgToExtracted.insert(
925  std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
926  Region.GVNStores.push_back(GlobalValue);
927  }
928 
929  stable_sort(Region.GVNStores);
930  OriginalIndex++;
931  TypeIndex++;
932  }
933 }
934 
935 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
936  DenseSet<unsigned> &NotSame) {
937  std::vector<unsigned> Inputs;
938  SetVector<Value *> ArgInputs, Outputs;
939 
940  getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
941  Outputs);
942 
943  if (Region.IgnoreRegion)
944  return;
945 
946  // Map the inputs found by the CodeExtractor to the arguments found for
947  // the overall function.
949 
950  // Map the outputs found by the CodeExtractor to the arguments found for
951  // the overall function.
953 }
954 
955 /// Replace the extracted function in the Region with a call to the overall
956 /// function constructed from the deduplicated similar regions, replacing and
957 /// remapping the values passed to the extracted function as arguments to the
958 /// new arguments of the overall function.
959 ///
960 /// \param [in] M - The module to outline from.
961 /// \param [in] Region - The regions of extracted code to be replaced with a new
962 /// function.
963 /// \returns a call instruction with the replaced function.
965  std::vector<Value *> NewCallArgs;
967 
968  OutlinableGroup &Group = *Region.Parent;
969  CallInst *Call = Region.Call;
970  assert(Call && "Call to replace is nullptr?");
971  Function *AggFunc = Group.OutlinedFunction;
972  assert(AggFunc && "Function to replace with is nullptr?");
973 
974  // If the arguments are the same size, there are not values that need to be
975  // made into an argument, the argument ordering has not been change, or
976  // different output registers to handle. We can simply replace the called
977  // function in this case.
978  if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
979  LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
980  << *AggFunc << " with same number of arguments\n");
981  Call->setCalledFunction(AggFunc);
982  return Call;
983  }
984 
985  // We have a different number of arguments than the new function, so
986  // we need to use our previously mappings off extracted argument to overall
987  // function argument, and constants to overall function argument to create the
988  // new argument list.
989  for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
990 
991  if (AggArgIdx == AggFunc->arg_size() - 1 &&
992  Group.OutputGVNCombinations.size() > 1) {
993  // If we are on the last argument, and we need to differentiate between
994  // output blocks, add an integer to the argument list to determine
995  // what block to take
996  LLVM_DEBUG(dbgs() << "Set switch block argument to "
997  << Region.OutputBlockNum << "\n");
998  NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
999  Region.OutputBlockNum));
1000  continue;
1001  }
1002 
1003  ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1004  if (ArgPair != Region.AggArgToExtracted.end()) {
1005  Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1006  // If we found the mapping from the extracted function to the overall
1007  // function, we simply add it to the argument list. We use the same
1008  // value, it just needs to honor the new order of arguments.
1009  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1010  << *ArgumentValue << "\n");
1011  NewCallArgs.push_back(ArgumentValue);
1012  continue;
1013  }
1014 
1015  // If it is a constant, we simply add it to the argument list as a value.
1016  if (Region.AggArgToConstant.find(AggArgIdx) !=
1017  Region.AggArgToConstant.end()) {
1018  Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1019  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1020  << *CST << "\n");
1021  NewCallArgs.push_back(CST);
1022  continue;
1023  }
1024 
1025  // Add a nullptr value if the argument is not found in the extracted
1026  // function. If we cannot find a value, it means it is not in use
1027  // for the region, so we should not pass anything to it.
1028  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1029  NewCallArgs.push_back(ConstantPointerNull::get(
1030  static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1031  }
1032 
1033  LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1034  << *AggFunc << " with new set of arguments\n");
1035  // Create the new call instruction and erase the old one.
1036  Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1037  Call);
1038 
1039  // It is possible that the call to the outlined function is either the first
1040  // instruction is in the new block, the last instruction, or both. If either
1041  // of these is the case, we need to make sure that we replace the instruction
1042  // in the IRInstructionData struct with the new call.
1043  CallInst *OldCall = Region.Call;
1044  if (Region.NewFront->Inst == OldCall)
1045  Region.NewFront->Inst = Call;
1046  if (Region.NewBack->Inst == OldCall)
1047  Region.NewBack->Inst = Call;
1048 
1049  // Transfer any debug information.
1050  Call->setDebugLoc(Region.Call->getDebugLoc());
1051  // Since our output may determine which branch we go to, we make sure to
1052  // propogate this new call value through the module.
1053  OldCall->replaceAllUsesWith(Call);
1054 
1055  // Remove the old instruction.
1056  OldCall->eraseFromParent();
1057  Region.Call = Call;
1058 
1059  // Make sure that the argument in the new function has the SwiftError
1060  // argument.
1061  if (Group.SwiftErrorArgument.hasValue())
1062  Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1063  Attribute::SwiftError);
1064 
1065  return Call;
1066 }
1067 
1068 // Within an extracted function, replace the argument uses of the extracted
1069 // region with the arguments of the function for an OutlinableGroup.
1070 //
1071 /// \param [in] Region - The region of extracted code to be changed.
1072 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1073 /// region.
1074 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1075 /// function to define the overall outlined function for all the regions, or
1076 /// if we are operating on one of the following regions.
1077 static void
1080  bool FirstFunction = false) {
1081  OutlinableGroup &Group = *Region.Parent;
1082  assert(Region.ExtractedFunction && "Region has no extracted function?");
1083 
1084  Function *DominatingFunction = Region.ExtractedFunction;
1085  if (FirstFunction)
1086  DominatingFunction = Group.OutlinedFunction;
1087  DominatorTree DT(*DominatingFunction);
1088 
1089  for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1090  ArgIdx++) {
1091  assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1092  Region.ExtractedArgToAgg.end() &&
1093  "No mapping from extracted to outlined?");
1094  unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1095  Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1096  Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1097  // The argument is an input, so we can simply replace it with the overall
1098  // argument value
1099  if (ArgIdx < Region.NumExtractedInputs) {
1100  LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1101  << *Region.ExtractedFunction << " with " << *AggArg
1102  << " in function " << *Group.OutlinedFunction << "\n");
1103  Arg->replaceAllUsesWith(AggArg);
1104  continue;
1105  }
1106 
1107  // If we are replacing an output, we place the store value in its own
1108  // block inside the overall function before replacing the use of the output
1109  // in the function.
1110  assert(Arg->hasOneUse() && "Output argument can only have one use");
1111  User *InstAsUser = Arg->user_back();
1112  assert(InstAsUser && "User is nullptr!");
1113 
1114  Instruction *I = cast<Instruction>(InstAsUser);
1115  BasicBlock *BB = I->getParent();
1116  SmallVector<BasicBlock *, 4> Descendants;
1117  DT.getDescendants(BB, Descendants);
1118  bool EdgeAdded = false;
1119  if (Descendants.size() == 0) {
1120  EdgeAdded = true;
1121  DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1122  DT.getDescendants(BB, Descendants);
1123  }
1124 
1125  // Iterate over the following blocks, looking for return instructions,
1126  // if we find one, find the corresponding output block for the return value
1127  // and move our store instruction there.
1128  for (BasicBlock *DescendBB : Descendants) {
1129  ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1130  if (!RI)
1131  continue;
1132  Value *RetVal = RI->getReturnValue();
1133  auto VBBIt = OutputBBs.find(RetVal);
1134  assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1135 
1136  // If this is storing a PHINode, we must make sure it is included in the
1137  // overall function.
1138  StoreInst *SI = cast<StoreInst>(I);
1139 
1140  Value *ValueOperand = SI->getValueOperand();
1141 
1142  StoreInst *NewI = cast<StoreInst>(I->clone());
1143  NewI->setDebugLoc(DebugLoc());
1144  BasicBlock *OutputBB = VBBIt->second;
1145  OutputBB->getInstList().push_back(NewI);
1146  LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1147  << *OutputBB << "\n");
1148 
1149  if (FirstFunction)
1150  continue;
1151  Value *CorrVal =
1152  Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1153  assert(CorrVal && "Value is nullptr?");
1154  NewI->setOperand(0, CorrVal);
1155  }
1156 
1157  // If we added an edge for basic blocks without a predecessor, we remove it
1158  // here.
1159  if (EdgeAdded)
1160  DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1161  I->eraseFromParent();
1162 
1163  LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1164  << *Region.ExtractedFunction << " with " << *AggArg
1165  << " in function " << *Group.OutlinedFunction << "\n");
1166  Arg->replaceAllUsesWith(AggArg);
1167  }
1168 }
1169 
1170 /// Within an extracted function, replace the constants that need to be lifted
1171 /// into arguments with the actual argument.
1172 ///
1173 /// \param Region [in] - The region of extracted code to be changed.
1175  OutlinableGroup &Group = *Region.Parent;
1176  // Iterate over the constants that need to be elevated into arguments
1177  for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1178  unsigned AggArgIdx = Const.first;
1179  Function *OutlinedFunction = Group.OutlinedFunction;
1180  assert(OutlinedFunction && "Overall Function is not defined?");
1181  Constant *CST = Const.second;
1182  Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1183  // Identify the argument it will be elevated to, and replace instances of
1184  // that constant in the function.
1185 
1186  // TODO: If in the future constants do not have one global value number,
1187  // i.e. a constant 1 could be mapped to several values, this check will
1188  // have to be more strict. It cannot be using only replaceUsesWithIf.
1189 
1190  LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1191  << " in function " << *OutlinedFunction << " with "
1192  << *Arg << "\n");
1193  CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1194  if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1195  return I->getFunction() == OutlinedFunction;
1196  return false;
1197  });
1198  }
1199 }
1200 
1201 /// It is possible that there is a basic block that already performs the same
1202 /// stores. This returns a duplicate block, if it exists
1203 ///
1204 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1205 /// \param OutputStoreBBs [in] The existing output blocks.
1206 /// \returns an optional value with the number output block if there is a match.
1209  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1210 
1211  bool Mismatch = false;
1212  unsigned MatchingNum = 0;
1213  // We compare the new set output blocks to the other sets of output blocks.
1214  // If they are the same number, and have identical instructions, they are
1215  // considered to be the same.
1216  for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1217  Mismatch = false;
1218  for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1220  OutputBBs.find(VToB.first);
1221  if (OutputBBIt == OutputBBs.end()) {
1222  Mismatch = true;
1223  break;
1224  }
1225 
1226  BasicBlock *CompBB = VToB.second;
1227  BasicBlock *OutputBB = OutputBBIt->second;
1228  if (CompBB->size() - 1 != OutputBB->size()) {
1229  Mismatch = true;
1230  break;
1231  }
1232 
1233  BasicBlock::iterator NIt = OutputBB->begin();
1234  for (Instruction &I : *CompBB) {
1235  if (isa<BranchInst>(&I))
1236  continue;
1237 
1238  if (!I.isIdenticalTo(&(*NIt))) {
1239  Mismatch = true;
1240  break;
1241  }
1242 
1243  NIt++;
1244  }
1245  }
1246 
1247  if (!Mismatch)
1248  return MatchingNum;
1249 
1250  MatchingNum++;
1251  }
1252 
1253  return None;
1254 }
1255 
1256 /// Remove empty output blocks from the outlined region.
1257 ///
1258 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1259 /// Region.
1260 /// \param Region - The OutlinableRegion we are analyzing.
1261 static bool
1264  bool AllRemoved = true;
1265  Value *RetValueForBB;
1266  BasicBlock *NewBB;
1268  // Iterate over the output blocks created in the outlined section.
1269  for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1270  RetValueForBB = VtoBB.first;
1271  NewBB = VtoBB.second;
1272 
1273  // If there are no instructions, we remove it from the module, and also
1274  // mark the value for removal from the return value to output block mapping.
1275  if (NewBB->size() == 0) {
1276  NewBB->eraseFromParent();
1277  ToRemove.push_back(RetValueForBB);
1278  continue;
1279  }
1280 
1281  // Mark that we could not remove all the blocks since they were not all
1282  // empty.
1283  AllRemoved = false;
1284  }
1285 
1286  // Remove the return value from the mapping.
1287  for (Value *V : ToRemove)
1288  BlocksToPrune.erase(V);
1289 
1290  // Mark the region as having the no output scheme.
1291  if (AllRemoved)
1292  Region.OutputBlockNum = -1;
1293 
1294  return AllRemoved;
1295 }
1296 
1297 /// For the outlined section, move needed the StoreInsts for the output
1298 /// registers into their own block. Then, determine if there is a duplicate
1299 /// output block already created.
1300 ///
1301 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1302 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1303 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
1304 /// placed in.
1305 /// \param [in] EndBBs - the final blocks of the extracted function.
1306 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
1307 /// been replaced by a new output value.
1308 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1313  const DenseMap<Value *, Value *> &OutputMappings,
1314  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1315  // If none of the output blocks have any instructions, this means that we do
1316  // not have to determine if it matches any of the other output schemes, and we
1317  // don't have to do anything else.
1318  if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
1319  return;
1320 
1321  // Determine is there is a duplicate set of blocks.
1322  Optional<unsigned> MatchingBB =
1323  findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
1324 
1325  // If there is, we remove the new output blocks. If it does not,
1326  // we add it to our list of sets of output blocks.
1327  if (MatchingBB.hasValue()) {
1328  LLVM_DEBUG(dbgs() << "Set output block for region in function"
1329  << Region.ExtractedFunction << " to "
1330  << MatchingBB.getValue());
1331 
1332  Region.OutputBlockNum = MatchingBB.getValue();
1333  for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
1334  VtoBB.second->eraseFromParent();
1335  return;
1336  }
1337 
1338  Region.OutputBlockNum = OutputStoreBBs.size();
1339 
1340  Value *RetValueForBB;
1341  BasicBlock *NewBB;
1342  OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1343  for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
1344  RetValueForBB = VtoBB.first;
1345  NewBB = VtoBB.second;
1347  EndBBs.find(RetValueForBB);
1348  LLVM_DEBUG(dbgs() << "Create output block for region in"
1349  << Region.ExtractedFunction << " to "
1350  << *NewBB);
1351  BranchInst::Create(VBBIt->second, NewBB);
1352  OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
1353  }
1354 }
1355 
1356 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
1357 /// before creating a basic block for each \p NewMap, and inserting into the new
1358 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
1359 ///
1360 /// \param OldMap [in] - The mapping to base the new mapping off of.
1361 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
1362 /// \param ParentFunc [in] - The function to put the new basic block in.
1363 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
1364 /// by an index value.
1367  Function *ParentFunc, Twine BaseName) {
1368  unsigned Idx = 0;
1369  std::vector<Value *> SortedKeys;
1370 
1371  getSortedConstantKeys(SortedKeys, OldMap);
1372 
1373  for (Value *RetVal : SortedKeys) {
1374  BasicBlock *NewBB = BasicBlock::Create(
1375  ParentFunc->getContext(),
1376  Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
1377  ParentFunc);
1378  NewMap.insert(std::make_pair(RetVal, NewBB));
1379  }
1380 }
1381 
1382 /// Create the switch statement for outlined function to differentiate between
1383 /// all the output blocks.
1384 ///
1385 /// For the outlined section, determine if an outlined block already exists that
1386 /// matches the needed stores for the extracted section.
1387 /// \param [in] M - The module we are outlining from.
1388 /// \param [in] OG - The group of regions to be outlined.
1389 /// \param [in] EndBBs - The final blocks of the extracted function.
1390 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1393  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1394  // We only need the switch statement if there is more than one store
1395  // combination.
1396  if (OG.OutputGVNCombinations.size() > 1) {
1397  Function *AggFunc = OG.OutlinedFunction;
1398  // Create a final block for each different return block.
1400  createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
1401 
1402  for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
1403  std::pair<Value *, BasicBlock *> &OutputBlock =
1404  *OG.EndBBs.find(RetBlockPair.first);
1405  BasicBlock *ReturnBlock = RetBlockPair.second;
1406  BasicBlock *EndBB = OutputBlock.second;
1407  Instruction *Term = EndBB->getTerminator();
1408  // Move the return value to the final block instead of the original exit
1409  // stub.
1410  Term->moveBefore(*ReturnBlock, ReturnBlock->end());
1411  // Put the switch statement in the old end basic block for the function
1412  // with a fall through to the new return block.
1413  LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
1414  << OutputStoreBBs.size() << "\n");
1415  SwitchInst *SwitchI =
1416  SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
1417  ReturnBlock, OutputStoreBBs.size(), EndBB);
1418 
1419  unsigned Idx = 0;
1420  for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
1422  OutputStoreBB.find(OutputBlock.first);
1423 
1424  if (OSBBIt == OutputStoreBB.end())
1425  continue;
1426 
1427  BasicBlock *BB = OSBBIt->second;
1428  SwitchI->addCase(
1429  ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
1430  Term = BB->getTerminator();
1431  Term->setSuccessor(0, ReturnBlock);
1432  Idx++;
1433  }
1434  }
1435  return;
1436  }
1437 
1438  // If there needs to be stores, move them from the output blocks to their
1439  // corresponding ending block.
1440  if (OutputStoreBBs.size() == 1) {
1441  LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
1442  << *OG.OutlinedFunction << "\n");
1443  DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
1444  for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
1446  EndBBs.find(VBPair.first);
1447  assert(EndBBIt != EndBBs.end() && "Could not find end block");
1448  BasicBlock *EndBB = EndBBIt->second;
1449  BasicBlock *OutputBB = VBPair.second;
1450  Instruction *Term = OutputBB->getTerminator();
1451  Term->eraseFromParent();
1452  Term = EndBB->getTerminator();
1453  moveBBContents(*OutputBB, *EndBB);
1454  Term->moveBefore(*EndBB, EndBB->end());
1455  OutputBB->eraseFromParent();
1456  }
1457  }
1458 }
1459 
1460 /// Fill the new function that will serve as the replacement function for all of
1461 /// the extracted regions of a certain structure from the first region in the
1462 /// list of regions. Replace this first region's extracted function with the
1463 /// new overall function.
1464 ///
1465 /// \param [in] M - The module we are outlining from.
1466 /// \param [in] CurrentGroup - The group of regions to be outlined.
1467 /// \param [in,out] OutputStoreBBs - The output blocks for each different
1468 /// set of stores needed for the different functions.
1469 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
1470 /// once outlining is complete.
1472  Module &M, OutlinableGroup &CurrentGroup,
1473  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
1474  std::vector<Function *> &FuncsToRemove) {
1475  OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
1476 
1477  // Move first extracted function's instructions into new function.
1478  LLVM_DEBUG(dbgs() << "Move instructions from "
1479  << *CurrentOS->ExtractedFunction << " to instruction "
1480  << *CurrentGroup.OutlinedFunction << "\n");
1481  moveFunctionData(*CurrentOS->ExtractedFunction,
1482  *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
1483 
1484  // Transfer the attributes from the function to the new function.
1485  for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
1486  CurrentGroup.OutlinedFunction->addFnAttr(A);
1487 
1488  // Create a new set of output blocks for the first extracted function.
1490  createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
1491  CurrentGroup.OutlinedFunction, "output_block_0");
1492  CurrentOS->OutputBlockNum = 0;
1493 
1494  replaceArgumentUses(*CurrentOS, NewBBs, true);
1495  replaceConstants(*CurrentOS);
1496 
1497  // We first identify if any output blocks are empty, if they are we remove
1498  // them. We then create a branch instruction to the basic block to the return
1499  // block for the function for each non empty output block.
1500  if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
1501  OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1502  for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
1504  CurrentGroup.EndBBs.find(VToBB.first);
1505  BasicBlock *EndBB = VBBIt->second;
1506  BranchInst::Create(EndBB, VToBB.second);
1507  OutputStoreBBs.back().insert(VToBB);
1508  }
1509  }
1510 
1511  // Replace the call to the extracted function with the outlined function.
1512  CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1513 
1514  // We only delete the extracted functions at the end since we may need to
1515  // reference instructions contained in them for mapping purposes.
1516  FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1517 }
1518 
1519 void IROutliner::deduplicateExtractedSections(
1520  Module &M, OutlinableGroup &CurrentGroup,
1521  std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
1522  createFunction(M, CurrentGroup, OutlinedFunctionNum);
1523 
1524  std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
1525 
1526  OutlinableRegion *CurrentOS;
1527 
1528  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
1529 
1530  std::vector<Value *> SortedKeys;
1531  for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
1532  CurrentOS = CurrentGroup.Regions[Idx];
1534  *CurrentOS->ExtractedFunction);
1535 
1536  // Create a set of BasicBlocks, one for each return block, to hold the
1537  // needed store instructions.
1540  CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
1541  "output_block_" + Twine(static_cast<unsigned>(Idx)));
1542 
1543  replaceArgumentUses(*CurrentOS, NewBBs);
1544  alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
1545  CurrentGroup.EndBBs, OutputMappings,
1546  OutputStoreBBs);
1547 
1548  CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1549  FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1550  }
1551 
1552  // Create a switch statement to handle the different output schemes.
1553  createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
1554 
1555  OutlinedFunctionNum++;
1556 }
1557 
1558 /// Checks that the next instruction in the InstructionDataList matches the
1559 /// next instruction in the module. If they do not, there could be the
1560 /// possibility that extra code has been inserted, and we must ignore it.
1561 ///
1562 /// \param ID - The IRInstructionData to check the next instruction of.
1563 /// \returns true if the InstructionDataList and actual instruction match.
1565  // We check if there is a discrepancy between the InstructionDataList
1566  // and the actual next instruction in the module. If there is, it means
1567  // that an extra instruction was added, likely by the CodeExtractor.
1568 
1569  // Since we do not have any similarity data about this particular
1570  // instruction, we cannot confidently outline it, and must discard this
1571  // candidate.
1572  IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
1573  Instruction *NextIDLInst = NextIDIt->Inst;
1574  Instruction *NextModuleInst = nullptr;
1575  if (!ID.Inst->isTerminator())
1576  NextModuleInst = ID.Inst->getNextNonDebugInstruction();
1577  else if (NextIDLInst != nullptr)
1578  NextModuleInst =
1579  &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
1580 
1581  if (NextIDLInst && NextIDLInst != NextModuleInst)
1582  return false;
1583 
1584  return true;
1585 }
1586 
1587 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
1588  const OutlinableRegion &Region) {
1589  IRSimilarityCandidate *IRSC = Region.Candidate;
1590  unsigned StartIdx = IRSC->getStartIdx();
1591  unsigned EndIdx = IRSC->getEndIdx();
1592 
1593  // A check to make sure that we are not about to attempt to outline something
1594  // that has already been outlined.
1595  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1596  if (Outlined.contains(Idx))
1597  return false;
1598 
1599  // We check if the recorded instruction matches the actual next instruction,
1600  // if it does not, we fix it in the InstructionDataList.
1601  if (!Region.Candidate->backInstruction()->isTerminator()) {
1602  Instruction *NewEndInst =
1603  Region.Candidate->backInstruction()->getNextNonDebugInstruction();
1604  assert(NewEndInst && "Next instruction is a nullptr?");
1605  if (Region.Candidate->end()->Inst != NewEndInst) {
1606  IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1607  IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
1608  IRInstructionData(*NewEndInst,
1609  InstructionClassifier.visit(*NewEndInst), *IDL);
1610 
1611  // Insert the first IRInstructionData of the new region after the
1612  // last IRInstructionData of the IRSimilarityCandidate.
1613  IDL->insert(Region.Candidate->end(), *NewEndIRID);
1614  }
1615  }
1616 
1617  return none_of(*IRSC, [this](IRInstructionData &ID) {
1619  return true;
1620 
1621  return !this->InstructionClassifier.visit(ID.Inst);
1622  });
1623 }
1624 
1625 void IROutliner::pruneIncompatibleRegions(
1626  std::vector<IRSimilarityCandidate> &CandidateVec,
1627  OutlinableGroup &CurrentGroup) {
1628  bool PreviouslyOutlined;
1629 
1630  // Sort from beginning to end, so the IRSimilarityCandidates are in order.
1631  stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
1632  const IRSimilarityCandidate &RHS) {
1633  return LHS.getStartIdx() < RHS.getStartIdx();
1634  });
1635 
1636  IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
1637  // Since outlining a call and a branch instruction will be the same as only
1638  // outlinining a call instruction, we ignore it as a space saving.
1639  if (FirstCandidate.getLength() == 2) {
1640  if (isa<CallInst>(FirstCandidate.front()->Inst) &&
1641  isa<BranchInst>(FirstCandidate.back()->Inst))
1642  return;
1643  }
1644 
1645  unsigned CurrentEndIdx = 0;
1646  for (IRSimilarityCandidate &IRSC : CandidateVec) {
1647  PreviouslyOutlined = false;
1648  unsigned StartIdx = IRSC.getStartIdx();
1649  unsigned EndIdx = IRSC.getEndIdx();
1650 
1651  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1652  if (Outlined.contains(Idx)) {
1653  PreviouslyOutlined = true;
1654  break;
1655  }
1656 
1657  if (PreviouslyOutlined)
1658  continue;
1659 
1660  // Check over the instructions, and if the basic block has its address
1661  // taken for use somewhere else, we do not outline that block.
1662  bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
1663  return ID.Inst->getParent()->hasAddressTaken();
1664  });
1665 
1666  if (BBHasAddressTaken)
1667  continue;
1668 
1669  if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
1670  !OutlineFromLinkODRs)
1671  continue;
1672 
1673  // Greedily prune out any regions that will overlap with already chosen
1674  // regions.
1675  if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
1676  continue;
1677 
1678  bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
1680  return true;
1681 
1682  return !this->InstructionClassifier.visit(ID.Inst);
1683  });
1684 
1685  if (BadInst)
1686  continue;
1687 
1688  OutlinableRegion *OS = new (RegionAllocator.Allocate())
1689  OutlinableRegion(IRSC, CurrentGroup);
1690  CurrentGroup.Regions.push_back(OS);
1691 
1692  CurrentEndIdx = EndIdx;
1693  }
1694 }
1695 
1697 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
1698  InstructionCost RegionBenefit = 0;
1699  for (OutlinableRegion *Region : CurrentGroup.Regions) {
1700  TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1701  // We add the number of instructions in the region to the benefit as an
1702  // estimate as to how much will be removed.
1703  RegionBenefit += Region->getBenefit(TTI);
1704  LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
1705  << " saved instructions to overfall benefit.\n");
1706  }
1707 
1708  return RegionBenefit;
1709 }
1710 
1712 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
1713  InstructionCost OverallCost = 0;
1714  for (OutlinableRegion *Region : CurrentGroup.Regions) {
1715  TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1716 
1717  // Each output incurs a load after the call, so we add that to the cost.
1718  for (unsigned OutputGVN : Region->GVNStores) {
1719  Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
1720  assert(OV.hasValue() && "Could not find value for GVN?");
1721  Value *V = OV.getValue();
1722  InstructionCost LoadCost =
1725 
1726  LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
1727  << " instructions to cost for output of type "
1728  << *V->getType() << "\n");
1729  OverallCost += LoadCost;
1730  }
1731  }
1732 
1733  return OverallCost;
1734 }
1735 
1736 /// Find the extra instructions needed to handle any output values for the
1737 /// region.
1738 ///
1739 /// \param [in] M - The Module to outline from.
1740 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
1741 /// \param [in] TTI - The TargetTransformInfo used to collect information for
1742 /// new instruction costs.
1743 /// \returns the additional cost to handle the outputs.
1745  OutlinableGroup &CurrentGroup,
1747  InstructionCost OutputCost = 0;
1748  unsigned NumOutputBranches = 0;
1749 
1750  IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
1751  DenseSet<BasicBlock *> CandidateBlocks;
1752  Candidate.getBasicBlocks(CandidateBlocks);
1753 
1754  // Count the number of different output branches that point to blocks outside
1755  // of the region.
1756  DenseSet<BasicBlock *> FoundBlocks;
1757  for (IRInstructionData &ID : Candidate) {
1758  if (!isa<BranchInst>(ID.Inst))
1759  continue;
1760 
1761  for (Value *V : ID.OperVals) {
1762  BasicBlock *BB = static_cast<BasicBlock *>(V);
1763  DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
1764  if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
1765  continue;
1766  FoundBlocks.insert(BB);
1767  NumOutputBranches++;
1768  }
1769  }
1770 
1771  CurrentGroup.BranchesToOutside = NumOutputBranches;
1772 
1773  for (const ArrayRef<unsigned> &OutputUse :
1774  CurrentGroup.OutputGVNCombinations) {
1775  for (unsigned GVN : OutputUse) {
1776  Optional<Value *> OV = Candidate.fromGVN(GVN);
1777  assert(OV.hasValue() && "Could not find value for GVN?");
1778  Value *V = OV.getValue();
1779  InstructionCost StoreCost =
1782 
1783  // An instruction cost is added for each store set that needs to occur for
1784  // various output combinations inside the function, plus a branch to
1785  // return to the exit block.
1786  LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
1787  << " instructions to cost for output of type "
1788  << *V->getType() << "\n");
1789  OutputCost += StoreCost * NumOutputBranches;
1790  }
1791 
1792  InstructionCost BranchCost =
1794  LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
1795  << " a branch instruction\n");
1796  OutputCost += BranchCost * NumOutputBranches;
1797  }
1798 
1799  // If there is more than one output scheme, we must have a comparison and
1800  // branch for each different item in the switch statement.
1801  if (CurrentGroup.OutputGVNCombinations.size() > 1) {
1802  InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
1803  Instruction::ICmp, Type::getInt32Ty(M.getContext()),
1806  InstructionCost BranchCost =
1808 
1809  unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
1810  InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
1811 
1812  LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
1813  << " instructions for each switch case for each different"
1814  << " output path in a function\n");
1815  OutputCost += TotalCost * NumOutputBranches;
1816  }
1817 
1818  return OutputCost;
1819 }
1820 
1821 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
1822  InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
1823  CurrentGroup.Benefit += RegionBenefit;
1824  LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
1825 
1826  InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
1827  CurrentGroup.Cost += OutputReloadCost;
1828  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1829 
1830  InstructionCost AverageRegionBenefit =
1831  RegionBenefit / CurrentGroup.Regions.size();
1832  unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
1833  unsigned NumRegions = CurrentGroup.Regions.size();
1835  getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
1836 
1837  // We add one region to the cost once, to account for the instructions added
1838  // inside of the newly created function.
1839  LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
1840  << " instructions to cost for body of new function.\n");
1841  CurrentGroup.Cost += AverageRegionBenefit;
1842  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1843 
1844  // For each argument, we must add an instruction for loading the argument
1845  // out of the register and into a value inside of the newly outlined function.
1846  LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1847  << " instructions to cost for each argument in the new"
1848  << " function.\n");
1849  CurrentGroup.Cost +=
1850  OverallArgumentNum * TargetTransformInfo::TCC_Basic;
1851  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1852 
1853  // Each argument needs to either be loaded into a register or onto the stack.
1854  // Some arguments will only be loaded into the stack once the argument
1855  // registers are filled.
1856  LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1857  << " instructions to cost for each argument in the new"
1858  << " function " << NumRegions << " times for the "
1859  << "needed argument handling at the call site.\n");
1860  CurrentGroup.Cost +=
1861  2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
1862  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1863 
1864  CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
1865  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1866 }
1867 
1868 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
1869  ArrayRef<Value *> Outputs,
1870  LoadInst *LI) {
1871  // For and load instructions following the call
1872  Value *Operand = LI->getPointerOperand();
1873  Optional<unsigned> OutputIdx = None;
1874  // Find if the operand it is an output register.
1875  for (unsigned ArgIdx = Region.NumExtractedInputs;
1876  ArgIdx < Region.Call->arg_size(); ArgIdx++) {
1877  if (Operand == Region.Call->getArgOperand(ArgIdx)) {
1878  OutputIdx = ArgIdx - Region.NumExtractedInputs;
1879  break;
1880  }
1881  }
1882 
1883  // If we found an output register, place a mapping of the new value
1884  // to the original in the mapping.
1885  if (!OutputIdx.hasValue())
1886  return;
1887 
1888  if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
1889  OutputMappings.end()) {
1890  LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
1891  << *Outputs[OutputIdx.getValue()] << "\n");
1892  OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
1893  } else {
1894  Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
1895  LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
1896  << *Outputs[OutputIdx.getValue()] << "\n");
1897  OutputMappings.insert(std::make_pair(LI, Orig));
1898  }
1899 }
1900 
1901 bool IROutliner::extractSection(OutlinableRegion &Region) {
1902  SetVector<Value *> ArgInputs, Outputs, SinkCands;
1903  assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
1904  BasicBlock *InitialStart = Region.StartBB;
1905  Function *OrigF = Region.StartBB->getParent();
1906  CodeExtractorAnalysisCache CEAC(*OrigF);
1907  Region.ExtractedFunction =
1908  Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
1909 
1910  // If the extraction was successful, find the BasicBlock, and reassign the
1911  // OutlinableRegion blocks
1912  if (!Region.ExtractedFunction) {
1913  LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
1914  << "\n");
1915  Region.reattachCandidate();
1916  return false;
1917  }
1918 
1919  // Get the block containing the called branch, and reassign the blocks as
1920  // necessary. If the original block still exists, it is because we ended on
1921  // a branch instruction, and so we move the contents into the block before
1922  // and assign the previous block correctly.
1923  User *InstAsUser = Region.ExtractedFunction->user_back();
1924  BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
1925  Region.PrevBB = RewrittenBB->getSinglePredecessor();
1926  assert(Region.PrevBB && "PrevBB is nullptr?");
1927  if (Region.PrevBB == InitialStart) {
1928  BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
1929  Instruction *BI = NewPrev->getTerminator();
1930  BI->eraseFromParent();
1931  moveBBContents(*InitialStart, *NewPrev);
1932  Region.PrevBB = NewPrev;
1933  InitialStart->eraseFromParent();
1934  }
1935 
1936  Region.StartBB = RewrittenBB;
1937  Region.EndBB = RewrittenBB;
1938 
1939  // The sequences of outlinable regions has now changed. We must fix the
1940  // IRInstructionDataList for consistency. Although they may not be illegal
1941  // instructions, they should not be compared with anything else as they
1942  // should not be outlined in this round. So marking these as illegal is
1943  // allowed.
1944  IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1945  Instruction *BeginRewritten = &*RewrittenBB->begin();
1946  Instruction *EndRewritten = &*RewrittenBB->begin();
1947  Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
1948  *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
1949  Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
1950  *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
1951 
1952  // Insert the first IRInstructionData of the new region in front of the
1953  // first IRInstructionData of the IRSimilarityCandidate.
1954  IDL->insert(Region.Candidate->begin(), *Region.NewFront);
1955  // Insert the first IRInstructionData of the new region after the
1956  // last IRInstructionData of the IRSimilarityCandidate.
1957  IDL->insert(Region.Candidate->end(), *Region.NewBack);
1958  // Remove the IRInstructionData from the IRSimilarityCandidate.
1959  IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
1960 
1961  assert(RewrittenBB != nullptr &&
1962  "Could not find a predecessor after extraction!");
1963 
1964  // Iterate over the new set of instructions to find the new call
1965  // instruction.
1966  for (Instruction &I : *RewrittenBB)
1967  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1968  if (Region.ExtractedFunction == CI->getCalledFunction())
1969  Region.Call = CI;
1970  } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
1971  updateOutputMapping(Region, Outputs.getArrayRef(), LI);
1972  Region.reattachCandidate();
1973  return true;
1974 }
1975 
1976 unsigned IROutliner::doOutline(Module &M) {
1977  // Find the possible similarity sections.
1978  InstructionClassifier.EnableBranches = !DisableBranches;
1979  IRSimilarityIdentifier &Identifier = getIRSI(M);
1980  SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
1981 
1982  // Sort them by size of extracted sections
1983  unsigned OutlinedFunctionNum = 0;
1984  // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
1985  // to sort them by the potential number of instructions to be outlined
1986  if (SimilarityCandidates.size() > 1)
1987  llvm::stable_sort(SimilarityCandidates,
1988  [](const std::vector<IRSimilarityCandidate> &LHS,
1989  const std::vector<IRSimilarityCandidate> &RHS) {
1990  return LHS[0].getLength() * LHS.size() >
1991  RHS[0].getLength() * RHS.size();
1992  });
1993  // Creating OutlinableGroups for each SimilarityCandidate to be used in
1994  // each of the following for loops to avoid making an allocator.
1995  std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
1996 
1997  DenseSet<unsigned> NotSame;
1998  std::vector<OutlinableGroup *> NegativeCostGroups;
1999  std::vector<OutlinableRegion *> OutlinedRegions;
2000  // Iterate over the possible sets of similarity.
2001  unsigned PotentialGroupIdx = 0;
2002  for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2003  OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2004 
2005  // Remove entries that were previously outlined
2006  pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2007 
2008  // We pruned the number of regions to 0 to 1, meaning that it's not worth
2009  // trying to outlined since there is no compatible similar instance of this
2010  // code.
2011  if (CurrentGroup.Regions.size() < 2)
2012  continue;
2013 
2014  // Determine if there are any values that are the same constant throughout
2015  // each section in the set.
2016  NotSame.clear();
2017  CurrentGroup.findSameConstants(NotSame);
2018 
2019  if (CurrentGroup.IgnoreGroup)
2020  continue;
2021 
2022  // Create a CodeExtractor for each outlinable region. Identify inputs and
2023  // outputs for each section using the code extractor and create the argument
2024  // types for the Aggregate Outlining Function.
2025  OutlinedRegions.clear();
2026  for (OutlinableRegion *OS : CurrentGroup.Regions) {
2027  // Break the outlinable region out of its parent BasicBlock into its own
2028  // BasicBlocks (see function implementation).
2029  OS->splitCandidate();
2030 
2031  // There's a chance that when the region is split, extra instructions are
2032  // added to the region. This makes the region no longer viable
2033  // to be split, so we ignore it for outlining.
2034  if (!OS->CandidateSplit)
2035  continue;
2036 
2038  DenseSet<BasicBlock *> BBSet;
2039  OS->Candidate->getBasicBlocks(BBSet, BE);
2040  OS->CE = new (ExtractorAllocator.Allocate())
2041  CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2042  false, "outlined");
2043  findAddInputsOutputs(M, *OS, NotSame);
2044  if (!OS->IgnoreRegion)
2045  OutlinedRegions.push_back(OS);
2046 
2047  // We recombine the blocks together now that we have gathered all the
2048  // needed information.
2049  OS->reattachCandidate();
2050  }
2051 
2052  CurrentGroup.Regions = std::move(OutlinedRegions);
2053 
2054  if (CurrentGroup.Regions.empty())
2055  continue;
2056 
2057  CurrentGroup.collectGVNStoreSets(M);
2058 
2059  if (CostModel)
2060  findCostBenefit(M, CurrentGroup);
2061 
2062  // If we are adhering to the cost model, skip those groups where the cost
2063  // outweighs the benefits.
2064  if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2066  getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2067  ORE.emit([&]() {
2068  IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2069  OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2070  C->frontInstruction());
2071  R << "did not outline "
2072  << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2073  << " regions due to estimated increase of "
2074  << ore::NV("InstructionIncrease",
2075  CurrentGroup.Cost - CurrentGroup.Benefit)
2076  << " instructions at locations ";
2077  interleave(
2078  CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2079  [&R](OutlinableRegion *Region) {
2080  R << ore::NV(
2081  "DebugLoc",
2082  Region->Candidate->frontInstruction()->getDebugLoc());
2083  },
2084  [&R]() { R << " "; });
2085  return R;
2086  });
2087  continue;
2088  }
2089 
2090  NegativeCostGroups.push_back(&CurrentGroup);
2091  }
2092 
2093  ExtractorAllocator.DestroyAll();
2094 
2095  if (NegativeCostGroups.size() > 1)
2096  stable_sort(NegativeCostGroups,
2097  [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2098  return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2099  });
2100 
2101  std::vector<Function *> FuncsToRemove;
2102  for (OutlinableGroup *CG : NegativeCostGroups) {
2103  OutlinableGroup &CurrentGroup = *CG;
2104 
2105  OutlinedRegions.clear();
2106  for (OutlinableRegion *Region : CurrentGroup.Regions) {
2107  // We check whether our region is compatible with what has already been
2108  // outlined, and whether we need to ignore this item.
2109  if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2110  continue;
2111  OutlinedRegions.push_back(Region);
2112  }
2113 
2114  if (OutlinedRegions.size() < 2)
2115  continue;
2116 
2117  // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2118  // we are still outlining enough regions to make up for the added cost.
2119  CurrentGroup.Regions = std::move(OutlinedRegions);
2120  if (CostModel) {
2121  CurrentGroup.Benefit = 0;
2122  CurrentGroup.Cost = 0;
2123  findCostBenefit(M, CurrentGroup);
2124  if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2125  continue;
2126  }
2127  OutlinedRegions.clear();
2128  for (OutlinableRegion *Region : CurrentGroup.Regions) {
2129  Region->splitCandidate();
2130  if (!Region->CandidateSplit)
2131  continue;
2132  OutlinedRegions.push_back(Region);
2133  }
2134 
2135  CurrentGroup.Regions = std::move(OutlinedRegions);
2136  if (CurrentGroup.Regions.size() < 2) {
2137  for (OutlinableRegion *R : CurrentGroup.Regions)
2138  R->reattachCandidate();
2139  continue;
2140  }
2141 
2142  LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2143  << " and benefit " << CurrentGroup.Benefit << "\n");
2144 
2145  // Create functions out of all the sections, and mark them as outlined.
2146  OutlinedRegions.clear();
2147  for (OutlinableRegion *OS : CurrentGroup.Regions) {
2149  DenseSet<BasicBlock *> BBSet;
2150  OS->Candidate->getBasicBlocks(BBSet, BE);
2151  OS->CE = new (ExtractorAllocator.Allocate())
2152  CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2153  false, "outlined");
2154  bool FunctionOutlined = extractSection(*OS);
2155  if (FunctionOutlined) {
2156  unsigned StartIdx = OS->Candidate->getStartIdx();
2157  unsigned EndIdx = OS->Candidate->getEndIdx();
2158  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2159  Outlined.insert(Idx);
2160 
2161  OutlinedRegions.push_back(OS);
2162  }
2163  }
2164 
2165  LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2166  << " with benefit " << CurrentGroup.Benefit
2167  << " and cost " << CurrentGroup.Cost << "\n");
2168 
2169  CurrentGroup.Regions = std::move(OutlinedRegions);
2170 
2171  if (CurrentGroup.Regions.empty())
2172  continue;
2173 
2175  getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2176  ORE.emit([&]() {
2177  IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2178  OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2179  R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2180  << " regions with decrease of "
2181  << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2182  << " instructions at locations ";
2183  interleave(
2184  CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2185  [&R](OutlinableRegion *Region) {
2186  R << ore::NV("DebugLoc",
2187  Region->Candidate->frontInstruction()->getDebugLoc());
2188  },
2189  [&R]() { R << " "; });
2190  return R;
2191  });
2192 
2193  deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2194  OutlinedFunctionNum);
2195  }
2196 
2197  for (Function *F : FuncsToRemove)
2198  F->eraseFromParent();
2199 
2200  return OutlinedFunctionNum;
2201 }
2202 
2204  CostModel = !NoCostModel;
2205  OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2206 
2207  return doOutline(M) > 0;
2208 }
2209 
2210 // Pass Manager Boilerplate
2211 namespace {
2212 class IROutlinerLegacyPass : public ModulePass {
2213 public:
2214  static char ID;
2215  IROutlinerLegacyPass() : ModulePass(ID) {
2217  }
2218 
2219  void getAnalysisUsage(AnalysisUsage &AU) const override {
2223  }
2224 
2225  bool runOnModule(Module &M) override;
2226 };
2227 } // namespace
2228 
2229 bool IROutlinerLegacyPass::runOnModule(Module &M) {
2230  if (skipModule(M))
2231  return false;
2232 
2233  std::unique_ptr<OptimizationRemarkEmitter> ORE;
2234  auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2235  ORE.reset(new OptimizationRemarkEmitter(&F));
2236  return *ORE.get();
2237  };
2238 
2239  auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2240  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2241  };
2242 
2243  auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2244  return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2245  };
2246 
2247  return IROutliner(GTTI, GIRSI, GORE).run(M);
2248 }
2249 
2251  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2252 
2254  [&FAM](Function &F) -> TargetTransformInfo & {
2255  return FAM.getResult<TargetIRAnalysis>(F);
2256  };
2257 
2259  [&AM](Module &M) -> IRSimilarityIdentifier & {
2260  return AM.getResult<IRSimilarityAnalysis>(M);
2261  };
2262 
2263  std::unique_ptr<OptimizationRemarkEmitter> ORE;
2265  [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2266  ORE.reset(new OptimizationRemarkEmitter(&F));
2267  return *ORE.get();
2268  };
2269 
2270  if (IROutliner(GTTI, GIRSI, GORE).run(M))
2271  return PreservedAnalyses::none();
2272  return PreservedAnalyses::all();
2273 }
2274 
2275 char IROutlinerLegacyPass::ID = 0;
2276 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2277  false)
2281 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2282  false)
2283 
2284 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:163
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
alignOutputBlockWithAggFunc
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
Definition: IROutliner.cpp:1309
IROutliner.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2418
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
getSortedConstantKeys
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:149
llvm::AttributeFuncs::mergeAttributesForOutlining
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
Definition: Attributes.cpp:2049
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
moveBBContents
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:139
OutlinableGroup::ArgumentTypes
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:71
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1663
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3010
llvm::IRSimilarity::IRInstructionDataList
Definition: IRSimilarityIdentifier.h:241
llvm::RegionBase::end
iterator end()
Definition: RegionInfo.h:561
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::DIBuilder
Definition: DIBuilder.h:41
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
DebugInfoMetadata.h
OutlinableGroup::collectGVNStoreSets
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:424
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
llvm::Instruction::getNextNonDebugInstruction
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
Definition: Instruction.cpp:738
OutlinableGroup::EndBBs
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:82
llvm::Function
Definition: Function.h:62
llvm::Attribute
Definition: Attributes.h:53
createAndInsertBasicBlocks
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
Definition: IROutliner.cpp:1365
OutlinableGroup::OutputGVNCombinations
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:91
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:855
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:631
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:104
llvm::IRSimilarityAnalysis
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Definition: IRSimilarityIdentifier.h:996
llvm::ReturnInst::getReturnValue
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Definition: Instructions.h:3055
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3425
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::find
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:707
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:540
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:461
llvm::IRSimilarity::IRSimilarityCandidate::getBasicBlocks
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
Definition: IRSimilarityIdentifier.h:732
OutlinableGroup::findSameConstants
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:417
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
EnableLinkOnceODRIROutlining
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
findConstants
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the the constants that will need to be lifted into arguments as they are not the same in each in...
Definition: IROutliner.cpp:585
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
OutlinableGroup::OutlinedFunctionType
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:73
fillOverallFunction
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs, std::vector< Function * > &FuncsToRemove)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
Definition: IROutliner.cpp:1471
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:771
llvm::SetVector::getArrayRef
ArrayRef< T > getArrayRef() const
Definition: SetVector.h:63
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:133
llvm::DisableBranches
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:40
llvm::AttributeList::getFnAttrs
AttributeSet getFnAttrs() const
The function attributes are returned.
Definition: Attributes.cpp:1384
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:385
llvm::IRSimilarity::SimilarityGroup
std::vector< IRSimilarityCandidate > SimilarityGroup
Definition: IRSimilarityIdentifier.h:857
llvm::Optional< unsigned >
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:818
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:272
llvm::IRSimilarity::SimilarityGroupList
std::vector< SimilarityGroup > SimilarityGroupList
Definition: IRSimilarityIdentifier.h:858
llvm::Mangler
Definition: Mangler.h:27
OutlinableGroup::OutlinedFunction
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:75
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::IROutlinerPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: IROutliner.cpp:2250
llvm::DominatorTreeBase::getDescendants
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
Definition: GenericDomTree.h:374
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Value::isSwiftError
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1012
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
CommandLine.h
llvm::interleave
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:1892
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
OutlinableGroup::Regions
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:67
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
findExtractedOutputToOverallOutputMapping
static void findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
Definition: IROutliner.cpp:841
analyzeAndPruneOutputBlocks
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
Definition: IROutliner.cpp:1262
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
DEBUG_TYPE
#define DEBUG_TYPE
Definition: IROutliner.cpp:32
llvm::IRSimilarityIdentifierWrapperPass
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
Definition: IRSimilarityIdentifier.h:976
replaceArgumentUses
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, bool FirstFunction=false)
Definition: IROutliner.cpp:1078
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
findDuplicateOutputBlock
Optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
Definition: IROutliner.cpp:1207
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
OutlinableGroup::NumAggregateInputs
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:99
findExtractedInputToOverallInputMapping
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:742
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1521
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
OutlinableGroup::InputTypesSet
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:95
llvm::OutlinableRegion::CandidateSplit
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:120
false
Definition: StackSlotColoring.cpp:142
llvm::Mangler::getNameWithPrefix
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:119
llvm::detail::DenseSetImpl::size
size_type size() const
Definition: DenseSet.h:81
llvm::Instruction
Definition: Instruction.h:45
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
mapInputsToGVNs
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:615
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
llvm::OutlinableRegion::reattachCandidate
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:238
llvm::OutlinableRegion::OutputBlockNum
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:79
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::OutlinableRegion::findCorrespondingValueIn
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:164
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:244
llvm::None
const NoneType None
Definition: None.h:23
remapExtractedInputs
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:641
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:316
llvm::IRSimilarity::IRSimilarityCandidate::getEndIdx
unsigned getEndIdx() const
Definition: IRSimilarityIdentifier.h:772
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
OutlinableGroup::CanonicalNumberToAggArg
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:103
llvm::OutlinableRegion::getBenefit
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:324
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:297
llvm::cl::opt< bool >
llvm::OutlinableRegion::ExtractedFunction
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:116
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::simple_ilist< IRInstructionData, ilist_sentinel_tracking< true > >::iterator
ilist_iterator< OptionsT, false, false > iterator
Definition: simple_ilist.h:95
llvm::OutlinableRegion::Call
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:113
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2474
llvm::simple_ilist::erase
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:196
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1790
IPO.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3148
llvm::DenseMap< Value *, BasicBlock * >
OutlinableGroup::PHIBlocks
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:86
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:642
llvm::OutlinableRegion::CE
CodeExtractor * CE
Used to create an outlined function.
Definition: IROutliner.h:110
DIBuilder.h
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1337
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:139
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::initializeIROutlinerLegacyPassPass
void initializeIROutlinerLegacyPassPass(PassRegistry &)
SI
StandardInstrumentations SI(Debug, VerifyEach)
collectRegionsConstants
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:369
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::IRSimilarity::IRInstructionData
This provides the utilities for hashing an Instruction to an unsigned integer.
Definition: IRSimilarityIdentifier.h:113
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:754
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::detail::DenseSetImpl::clear
void clear()
Definition: DenseSet.h:92
Mangler.h
llvm::IRSimilarity::IRSimilarityCandidate::front
IRInstructionData * front() const
Definition: IRSimilarityIdentifier.h:775
OutlinableGroup::Cost
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:114
llvm::NVPTXISD::Dummy
@ Dummy
Definition: NVPTXISelLowering.h:60
llvm::IROutliner::run
bool run(Module &M)
Definition: IROutliner.cpp:2203
OutlinableGroup
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:65
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, false) INITIALIZE_PASS_END(IROutlinerLegacyPass
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::IRSimilarity::IRInstructionData::Inst
Instruction * Inst
The source Instruction that is being wrapped.
Definition: IRSimilarityIdentifier.h:117
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
llvm::OutlinableRegion::splitCandidate
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:174
constantMatches
static Optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:301
llvm::createIROutlinerPass
ModulePass * createIROutlinerPass()
createIROutlinerPass - This pass finds similar code regions and factors those regions out into functi...
Definition: IROutliner.cpp:2284
replaceCalledFunction
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
Definition: IROutliner.cpp:964
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
OutlinableGroup::Benefit
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:111
llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::RegionBase::begin
iterator begin()
Definition: RegionInfo.h:560
nextIRInstructionDataMatchesNextInst
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
Definition: IROutliner.cpp:1564
findCostForOutputBlocks
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
Definition: IROutliner.cpp:1744
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::OutlinableRegion::IgnoreRegion
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition: IROutliner.h:123
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
OutlinableGroup::IgnoreGroup
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:79
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
NoCostModel
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
getSubprogramOrNull
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:439
Attributes.h
llvm::Function::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:564
replaceConstants
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
Definition: IROutliner.cpp:1174
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::GlobalValue::hasLinkOnceODRLinkage
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:442
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1784
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:756
llvm::Function::back
const BasicBlock & back() const
Definition: Function.h:732
llvm::OutlinableRegion
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:64
IRSimilarityIdentifier.h
llvm::IRSimilarity::IRSimilarityCandidate::getStartIdx
unsigned getStartIdx() const
Definition: IRSimilarityIdentifier.h:769
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
PassManager.h
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:36
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:177
llvm::IRSimilarity::IRSimilarityCandidate
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
Definition: IRSimilarityIdentifier.h:549
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
getCodeExtractorArguments
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:671
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
Dominators.h
OutlinableGroup::SwiftErrorArgument
Optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:118
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:87
TargetTransformInfo.h
llvm::Function::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:536
llvm::PHINode
Definition: Instructions.h:2657
llvm::DISubprogram
Subprogram description.
Definition: DebugInfoMetadata.h:1826
moveFunctionData
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:526
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::IROutliner
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:180
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::IRSimilarity::IRSimilarityIdentifier
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Definition: IRSimilarityIdentifier.h:883
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::simple_ilist::insert
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:159
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3236
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4330
llvm::cl::desc
Definition: CommandLine.h:412
llvm::Region
Definition: RegionInfo.h:889
llvm::SetVector< Value * >
CU
Definition: AArch64AsmBackend.cpp:504
OutlinableGroup::BranchesToOutside
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:107
InitializePasses.h
iroutliner
iroutliner
Definition: IROutliner.cpp:2281
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::OutlinableRegion::Candidate
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:66
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1198
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
createSwitchStatement
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
Definition: IROutliner.cpp:1391
llvm::Function::size
size_t size() const
Definition: Function.h:728
llvm::RegionBase::getParent
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
Outliner
IR Outliner
Definition: IROutliner.cpp:2281
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38