LLVM 18.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
27#include <optional>
28#include <vector>
29
30#define DEBUG_TYPE "iroutliner"
31
32using namespace llvm;
33using namespace IRSimilarity;
34
35// A command flag to be used for debugging to exclude branches from similarity
36// matching and outlining.
37namespace llvm {
39
40// A command flag to be used for debugging to indirect calls from similarity
41// matching and outlining.
43
44// A command flag to be used for debugging to exclude intrinsics from similarity
45// matching and outlining.
47
48} // namespace llvm
49
50// Set to true if the user wants the ir outliner to run on linkonceodr linkage
51// functions. This is false by default because the linker can dedupe linkonceodr
52// functions. Since the outliner is confined to a single module (modulo LTO),
53// this is off by default. It should, however, be the default behavior in
54// LTO.
56 "enable-linkonceodr-ir-outlining", cl::Hidden,
57 cl::desc("Enable the IR outliner on linkonceodr functions"),
58 cl::init(false));
59
60// This is a debug option to test small pieces of code to ensure that outlining
61// works correctly.
63 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
64 cl::desc("Debug option to outline greedily, without restriction that "
65 "calculated benefit outweighs cost"));
66
67/// The OutlinableGroup holds all the overarching information for outlining
68/// a set of regions that are structurally similar to one another, such as the
69/// types of the overall function, the output blocks, the sets of stores needed
70/// and a list of the different regions. This information is used in the
71/// deduplication of extracted regions with the same structure.
73 /// The sections that could be outlined
74 std::vector<OutlinableRegion *> Regions;
75
76 /// The argument types for the function created as the overall function to
77 /// replace the extracted function for each region.
78 std::vector<Type *> ArgumentTypes;
79 /// The FunctionType for the overall function.
81 /// The Function for the collective overall function.
83
84 /// Flag for whether we should not consider this group of OutlinableRegions
85 /// for extraction.
86 bool IgnoreGroup = false;
87
88 /// The return blocks for the overall function.
90
91 /// The PHIBlocks with their corresponding return block based on the return
92 /// value as the key.
94
95 /// A set containing the different GVN store sets needed. Each array contains
96 /// a sorted list of the different values that need to be stored into output
97 /// registers.
99
100 /// Flag for whether the \ref ArgumentTypes have been defined after the
101 /// extraction of the first region.
102 bool InputTypesSet = false;
103
104 /// The number of input values in \ref ArgumentTypes. Anything after this
105 /// index in ArgumentTypes is an output argument.
106 unsigned NumAggregateInputs = 0;
107
108 /// The mapping of the canonical numbering of the values in outlined sections
109 /// to specific arguments.
111
112 /// The number of branches in the region target a basic block that is outside
113 /// of the region.
114 unsigned BranchesToOutside = 0;
115
116 /// Tracker counting backwards from the highest unsigned value possible to
117 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
118 /// -2 and -1 are assigned by the DenseMap.
119 unsigned PHINodeGVNTracker = -3;
120
122 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
125
126 /// The number of instructions that will be outlined by extracting \ref
127 /// Regions.
129 /// The number of added instructions needed for the outlining of the \ref
130 /// Regions.
132
133 /// The argument that needs to be marked with the swifterr attribute. If not
134 /// needed, there is no value.
135 std::optional<unsigned> SwiftErrorArgument;
136
137 /// For the \ref Regions, we look at every Value. If it is a constant,
138 /// we check whether it is the same in Region.
139 ///
140 /// \param [in,out] NotSame contains the global value numbers where the
141 /// constant is not always the same, and must be passed in as an argument.
143
144 /// For the regions, look at each set of GVN stores needed and account for
145 /// each combination. Add an argument to the argument types if there is
146 /// more than one combination.
147 ///
148 /// \param [in] M - The module we are outlining from.
150};
151
152/// Move the contents of \p SourceBB to before the last instruction of \p
153/// TargetBB.
154/// \param SourceBB - the BasicBlock to pull Instructions from.
155/// \param TargetBB - the BasicBlock to put Instruction into.
156static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
157 for (Instruction &I : llvm::make_early_inc_range(SourceBB))
158 I.moveBeforePreserving(TargetBB, TargetBB.end());
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(*CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
195 return FoundValueOpt.value_or(nullptr);
196}
197
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = BB->getFirstNonPHI();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
222 BasicBlock *Replace,
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis()) {
225 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
226 ++Idx) {
227 // Check if the incoming block is included in the set of blocks being
228 // outlined.
229 BasicBlock *Incoming = PN.getIncomingBlock(Idx);
230 if (!Included.contains(Incoming))
231 continue;
232
233 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
234 assert(BI && "Not a branch instruction?");
235 // Look over the branching instructions into this block to see if we
236 // used to branch to Find in this outlined block.
237 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
238 Succ++) {
239 // If we have found the block to replace, we do so here.
240 if (BI->getSuccessor(Succ) != Find)
241 continue;
242 BI->setSuccessor(Succ, Replace);
243 }
244 }
245 }
246}
247
248
250 assert(!CandidateSplit && "Candidate already split!");
251
253
254 Instruction *EndInst = nullptr;
255 // Check whether the last instruction is a terminator, if it is, we do
256 // not split on the following instruction. We leave the block as it is. We
257 // also check that this is not the last instruction in the Module, otherwise
258 // the check for whether the current following instruction matches the
259 // previously recorded instruction will be incorrect.
260 if (!BackInst->isTerminator() ||
261 BackInst->getParent() != &BackInst->getFunction()->back()) {
262 EndInst = Candidate->end()->Inst;
263 assert(EndInst && "Expected an end instruction?");
264 }
265
266 // We check if the current instruction following the last instruction in the
267 // region is the same as the recorded instruction following the last
268 // instruction. If they do not match, there could be problems in rewriting
269 // the program after outlining, so we ignore it.
270 if (!BackInst->isTerminator() &&
271 EndInst != BackInst->getNextNonDebugInstruction())
272 return;
273
274 Instruction *StartInst = (*Candidate->begin()).Inst;
275 assert(StartInst && "Expected a start instruction?");
276 StartBB = StartInst->getParent();
277 PrevBB = StartBB;
278
281
282 // We iterate over the instructions in the region, if we find a PHINode, we
283 // check if there are predecessors outside of the region, if there are,
284 // we ignore this region since we are unable to handle the severing of the
285 // phi node right now.
286
287 // TODO: Handle extraneous inputs for PHINodes through variable number of
288 // inputs, similar to how outputs are handled.
289 BasicBlock::iterator It = StartInst->getIterator();
290 EndBB = BackInst->getParent();
291 BasicBlock *IBlock;
292 BasicBlock *PHIPredBlock = nullptr;
293 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
294 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
295 unsigned NumPredsOutsideRegion = 0;
296 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
297 if (!BBSet.contains(PN->getIncomingBlock(i))) {
298 PHIPredBlock = PN->getIncomingBlock(i);
299 ++NumPredsOutsideRegion;
300 continue;
301 }
302
303 // We must consider the case there the incoming block to the PHINode is
304 // the same as the final block of the OutlinableRegion. If this is the
305 // case, the branch from this block must also be outlined to be valid.
306 IBlock = PN->getIncomingBlock(i);
307 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
308 PHIPredBlock = PN->getIncomingBlock(i);
309 ++NumPredsOutsideRegion;
310 }
311 }
312
313 if (NumPredsOutsideRegion > 1)
314 return;
315
316 It++;
317 }
318
319 // If the region starts with a PHINode, but is not the initial instruction of
320 // the BasicBlock, we ignore this region for now.
321 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
322 return;
323
324 // If the region ends with a PHINode, but does not contain all of the phi node
325 // instructions of the region, we ignore it for now.
326 if (isa<PHINode>(BackInst) &&
327 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
328 return;
329
330 // The basic block gets split like so:
331 // block: block:
332 // inst1 inst1
333 // inst2 inst2
334 // region1 br block_to_outline
335 // region2 block_to_outline:
336 // region3 -> region1
337 // region4 region2
338 // inst3 region3
339 // inst4 region4
340 // br block_after_outline
341 // block_after_outline:
342 // inst3
343 // inst4
344
345 std::string OriginalName = PrevBB->getName().str();
346
347 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
349 // If there was a PHINode with an incoming block outside the region,
350 // make sure is correctly updated in the newly split block.
351 if (PHIPredBlock)
353
354 CandidateSplit = true;
355 if (!BackInst->isTerminator()) {
356 EndBB = EndInst->getParent();
357 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
360 } else {
361 EndBB = BackInst->getParent();
362 EndsInBranch = true;
363 FollowBB = nullptr;
364 }
365
366 // Refind the basic block set.
367 BBSet.clear();
369 // For the phi nodes in the new starting basic block of the region, we
370 // reassign the targets of the basic blocks branching instructions.
372 if (FollowBB)
374}
375
377 assert(CandidateSplit && "Candidate is not split!");
378
379 // The basic block gets reattached like so:
380 // block: block:
381 // inst1 inst1
382 // inst2 inst2
383 // br block_to_outline region1
384 // block_to_outline: -> region2
385 // region1 region3
386 // region2 region4
387 // region3 inst3
388 // region4 inst4
389 // br block_after_outline
390 // block_after_outline:
391 // inst3
392 // inst4
393 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
394
395 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
396 // Make sure PHINode references to the block we are merging into are
397 // updated to be incoming blocks from the predecessor to the current block.
398
399 // NOTE: If this is updated such that the outlined block can have more than
400 // one incoming block to a PHINode, this logic will have to updated
401 // to handle multiple precessors instead.
402
403 // We only need to update this if the outlined section contains a PHINode, if
404 // it does not, then the incoming block was never changed in the first place.
405 // On the other hand, if PrevBB has no predecessors, it means that all
406 // incoming blocks to the first block are contained in the region, and there
407 // will be nothing to update.
408 Instruction *StartInst = (*Candidate->begin()).Inst;
409 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
411 "PrevBB has more than one predecessor. Should be 0 or 1.");
412 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
414 }
416
417 // If we reattaching after outlining, we iterate over the phi nodes to
418 // the initial block, and reassign the branch instructions of the incoming
419 // blocks to the block we are remerging into.
420 if (!ExtractedFunction) {
423
425 if (!EndsInBranch)
427 }
428
430
431 BasicBlock *PlacementBB = PrevBB;
432 if (StartBB != EndBB)
433 PlacementBB = EndBB;
434 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
435 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
436 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
437 PlacementBB->getTerminator()->eraseFromParent();
438 moveBBContents(*FollowBB, *PlacementBB);
439 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
441 }
442
445
446 // Make sure to save changes back to the StartBB.
447 StartBB = PrevBB;
448 EndBB = nullptr;
449 PrevBB = nullptr;
450 FollowBB = nullptr;
451
452 CandidateSplit = false;
453}
454
455/// Find whether \p V matches the Constants previously found for the \p GVN.
456///
457/// \param V - The value to check for consistency.
458/// \param GVN - The global value number assigned to \p V.
459/// \param GVNToConstant - The mapping of global value number to Constants.
460/// \returns true if the Value matches the Constant mapped to by V and false if
461/// it \p V is a Constant but does not match.
462/// \returns std::nullopt if \p V is not a Constant.
463static std::optional<bool>
464constantMatches(Value *V, unsigned GVN,
465 DenseMap<unsigned, Constant *> &GVNToConstant) {
466 // See if we have a constants
467 Constant *CST = dyn_cast<Constant>(V);
468 if (!CST)
469 return std::nullopt;
470
471 // Holds a mapping from a global value number to a Constant.
473 bool Inserted;
474
475
476 // If we have a constant, try to make a new entry in the GVNToConstant.
477 std::tie(GVNToConstantIt, Inserted) =
478 GVNToConstant.insert(std::make_pair(GVN, CST));
479 // If it was found and is not equal, it is not the same. We do not
480 // handle this case yet, and exit early.
481 if (Inserted || (GVNToConstantIt->second == CST))
482 return true;
483
484 return false;
485}
486
488 InstructionCost Benefit = 0;
489
490 // Estimate the benefit of outlining a specific sections of the program. We
491 // delegate mostly this task to the TargetTransformInfo so that if the target
492 // has specific changes, we can have a more accurate estimate.
493
494 // However, getInstructionCost delegates the code size calculation for
495 // arithmetic instructions to getArithmeticInstrCost in
496 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
497 // code size for a division and remainder instruction to be equal to 4, and
498 // everything else to 1. This is not an accurate representation of the
499 // division instruction for targets that have a native division instruction.
500 // To be overly conservative, we only add 1 to the number of instructions for
501 // each division instruction.
502 for (IRInstructionData &ID : *Candidate) {
503 Instruction *I = ID.Inst;
504 switch (I->getOpcode()) {
505 case Instruction::FDiv:
506 case Instruction::FRem:
507 case Instruction::SDiv:
508 case Instruction::SRem:
509 case Instruction::UDiv:
510 case Instruction::URem:
511 Benefit += 1;
512 break;
513 default:
515 break;
516 }
517 }
518
519 return Benefit;
520}
521
522/// Check the \p OutputMappings structure for value \p Input, if it exists
523/// it has been used as an output for outlining, and has been renamed, and we
524/// return the new value, otherwise, we return the same value.
525///
526/// \param OutputMappings [in] - The mapping of values to their renamed value
527/// after being used as an output for an outlined region.
528/// \param Input [in] - The value to find the remapped value of, if it exists.
529/// \return The remapped value if it has been renamed, and the same value if has
530/// not.
532 Value *Input) {
534 OutputMappings.find(Input);
535 if (OutputMapping != OutputMappings.end())
536 return OutputMapping->second;
537 return Input;
538}
539
540/// Find whether \p Region matches the global value numbering to Constant
541/// mapping found so far.
542///
543/// \param Region - The OutlinableRegion we are checking for constants
544/// \param GVNToConstant - The mapping of global value number to Constants.
545/// \param NotSame - The set of global value numbers that do not have the same
546/// constant in each region.
547/// \returns true if all Constants are the same in every use of a Constant in \p
548/// Region and false if not
549static bool
551 DenseMap<unsigned, Constant *> &GVNToConstant,
552 DenseSet<unsigned> &NotSame) {
553 bool ConstantsTheSame = true;
554
555 IRSimilarityCandidate &C = *Region.Candidate;
556 for (IRInstructionData &ID : C) {
557
558 // Iterate over the operands in an instruction. If the global value number,
559 // assigned by the IRSimilarityCandidate, has been seen before, we check if
560 // the number has been found to be not the same value in each instance.
561 for (Value *V : ID.OperVals) {
562 std::optional<unsigned> GVNOpt = C.getGVN(V);
563 assert(GVNOpt && "Expected a GVN for operand?");
564 unsigned GVN = *GVNOpt;
565
566 // Check if this global value has been found to not be the same already.
567 if (NotSame.contains(GVN)) {
568 if (isa<Constant>(V))
569 ConstantsTheSame = false;
570 continue;
571 }
572
573 // If it has been the same so far, we check the value for if the
574 // associated Constant value match the previous instances of the same
575 // global value number. If the global value does not map to a Constant,
576 // it is considered to not be the same value.
577 std::optional<bool> ConstantMatches =
578 constantMatches(V, GVN, GVNToConstant);
579 if (ConstantMatches) {
580 if (*ConstantMatches)
581 continue;
582 else
583 ConstantsTheSame = false;
584 }
585
586 // While this value is a register, it might not have been previously,
587 // make sure we don't already have a constant mapped to this global value
588 // number.
589 if (GVNToConstant.contains(GVN))
590 ConstantsTheSame = false;
591
592 NotSame.insert(GVN);
593 }
594 }
595
596 return ConstantsTheSame;
597}
598
600 DenseMap<unsigned, Constant *> GVNToConstant;
601
602 for (OutlinableRegion *Region : Regions)
603 collectRegionsConstants(*Region, GVNToConstant, NotSame);
604}
605
607 for (OutlinableRegion *OS : Regions)
608 OutputGVNCombinations.insert(OS->GVNStores);
609
610 // We are adding an extracted argument to decide between which output path
611 // to use in the basic block. It is used in a switch statement and only
612 // needs to be an integer.
613 if (OutputGVNCombinations.size() > 1)
614 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
615}
616
617/// Get the subprogram if it exists for one of the outlined regions.
618///
619/// \param [in] Group - The set of regions to find a subprogram for.
620/// \returns the subprogram if it exists, or nullptr.
622 for (OutlinableRegion *OS : Group.Regions)
623 if (Function *F = OS->Call->getFunction())
624 if (DISubprogram *SP = F->getSubprogram())
625 return SP;
626
627 return nullptr;
628}
629
630Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
631 unsigned FunctionNameSuffix) {
632 assert(!Group.OutlinedFunction && "Function is already defined!");
633
634 Type *RetTy = Type::getVoidTy(M.getContext());
635 // All extracted functions _should_ have the same return type at this point
636 // since the similarity identifier ensures that all branches outside of the
637 // region occur in the same place.
638
639 // NOTE: Should we ever move to the model that uses a switch at every point
640 // needed, meaning that we could branch within the region or out, it is
641 // possible that we will need to switch to using the most general case all of
642 // the time.
643 for (OutlinableRegion *R : Group.Regions) {
644 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
645 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
646 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
647 RetTy = ExtractedFuncType;
648 }
649
651 RetTy, Group.ArgumentTypes, false);
652
653 // These functions will only be called from within the same module, so
654 // we can set an internal linkage.
657 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
658
659 // Transfer the swifterr attribute to the correct function parameter.
660 if (Group.SwiftErrorArgument)
662 Attribute::SwiftError);
663
664 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
665 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
666
667 // If there's a DISubprogram associated with this outlined function, then
668 // emit debug info for the outlined function.
669 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
670 Function *F = Group.OutlinedFunction;
671 // We have a DISubprogram. Get its DICompileUnit.
672 DICompileUnit *CU = SP->getUnit();
673 DIBuilder DB(M, true, CU);
674 DIFile *Unit = SP->getFile();
675 Mangler Mg;
676 // Get the mangled name of the function for the linkage name.
677 std::string Dummy;
678 llvm::raw_string_ostream MangledNameStream(Dummy);
679 Mg.getNameWithPrefix(MangledNameStream, F, false);
680
681 DISubprogram *OutlinedSP = DB.createFunction(
682 Unit /* Context */, F->getName(), MangledNameStream.str(),
683 Unit /* File */,
684 0 /* Line 0 is reserved for compiler-generated code. */,
685 DB.createSubroutineType(
686 DB.getOrCreateTypeArray(std::nullopt)), /* void type */
687 0, /* Line 0 is reserved for compiler-generated code. */
688 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
689 /* Outlined code is optimized code by definition. */
690 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
691
692 // Don't add any new variables to the subprogram.
693 DB.finalizeSubprogram(OutlinedSP);
694
695 // Attach subprogram to the function.
696 F->setSubprogram(OutlinedSP);
697 // We're done with the DIBuilder.
698 DB.finalize();
699 }
700
701 return Group.OutlinedFunction;
702}
703
704/// Move each BasicBlock in \p Old to \p New.
705///
706/// \param [in] Old - The function to move the basic blocks from.
707/// \param [in] New - The function to move the basic blocks to.
708/// \param [out] NewEnds - The return blocks of the new overall function.
709static void moveFunctionData(Function &Old, Function &New,
711 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
712 CurrBB.removeFromParent();
713 CurrBB.insertInto(&New);
714 Instruction *I = CurrBB.getTerminator();
715
716 // For each block we find a return instruction is, it is a potential exit
717 // path for the function. We keep track of each block based on the return
718 // value here.
719 if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
720 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
721
722 std::vector<Instruction *> DebugInsts;
723
724 for (Instruction &Val : CurrBB) {
725 // We must handle the scoping of called functions differently than
726 // other outlined instructions.
727 if (!isa<CallInst>(&Val)) {
728 // Remove the debug information for outlined functions.
729 Val.setDebugLoc(DebugLoc());
730
731 // Loop info metadata may contain line locations. Update them to have no
732 // value in the new subprogram since the outlined code could be from
733 // several locations.
734 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
735 if (DISubprogram *SP = New.getSubprogram())
736 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
737 return DILocation::get(New.getContext(), Loc->getLine(),
738 Loc->getColumn(), SP, nullptr);
739 return MD;
740 };
741 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
742 continue;
743 }
744
745 // From this point we are only handling call instructions.
746 CallInst *CI = cast<CallInst>(&Val);
747
748 // We add any debug statements here, to be removed after. Since the
749 // instructions originate from many different locations in the program,
750 // it will cause incorrect reporting from a debugger if we keep the
751 // same debug instructions.
752 if (isa<DbgInfoIntrinsic>(CI)) {
753 DebugInsts.push_back(&Val);
754 continue;
755 }
756
757 // Edit the scope of called functions inside of outlined functions.
758 if (DISubprogram *SP = New.getSubprogram()) {
759 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
760 Val.setDebugLoc(DI);
761 }
762 }
763
764 for (Instruction *I : DebugInsts)
765 I->eraseFromParent();
766 }
767}
768
769/// Find the constants that will need to be lifted into arguments
770/// as they are not the same in each instance of the region.
771///
772/// \param [in] C - The IRSimilarityCandidate containing the region we are
773/// analyzing.
774/// \param [in] NotSame - The set of global value numbers that do not have a
775/// single Constant across all OutlinableRegions similar to \p C.
776/// \param [out] Inputs - The list containing the global value numbers of the
777/// arguments needed for the region of code.
779 std::vector<unsigned> &Inputs) {
781 // Iterate over the instructions, and find what constants will need to be
782 // extracted into arguments.
783 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
784 IDIt != EndIDIt; IDIt++) {
785 for (Value *V : (*IDIt).OperVals) {
786 // Since these are stored before any outlining, they will be in the
787 // global value numbering.
788 unsigned GVN = *C.getGVN(V);
789 if (isa<Constant>(V))
790 if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
791 Inputs.push_back(GVN);
792 Seen.insert(GVN);
793 }
794 }
795 }
796}
797
798/// Find the GVN for the inputs that have been found by the CodeExtractor.
799///
800/// \param [in] C - The IRSimilarityCandidate containing the region we are
801/// analyzing.
802/// \param [in] CurrentInputs - The set of inputs found by the
803/// CodeExtractor.
804/// \param [in] OutputMappings - The mapping of values that have been replaced
805/// by a new output value.
806/// \param [out] EndInputNumbers - The global value numbers for the extracted
807/// arguments.
809 SetVector<Value *> &CurrentInputs,
810 const DenseMap<Value *, Value *> &OutputMappings,
811 std::vector<unsigned> &EndInputNumbers) {
812 // Get the Global Value Number for each input. We check if the Value has been
813 // replaced by a different value at output, and use the original value before
814 // replacement.
815 for (Value *Input : CurrentInputs) {
816 assert(Input && "Have a nullptr as an input");
817 if (OutputMappings.contains(Input))
818 Input = OutputMappings.find(Input)->second;
819 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
820 EndInputNumbers.push_back(*C.getGVN(Input));
821 }
822}
823
824/// Find the original value for the \p ArgInput values if any one of them was
825/// replaced during a previous extraction.
826///
827/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
828/// \param [in] OutputMappings - The mapping of values that have been replaced
829/// by a new output value.
830/// \param [out] RemappedArgInputs - The remapped values according to
831/// \p OutputMappings that will be extracted.
832static void
834 const DenseMap<Value *, Value *> &OutputMappings,
835 SetVector<Value *> &RemappedArgInputs) {
836 // Get the global value number for each input that will be extracted as an
837 // argument by the code extractor, remapping if needed for reloaded values.
838 for (Value *Input : ArgInputs) {
839 if (OutputMappings.contains(Input))
840 Input = OutputMappings.find(Input)->second;
841 RemappedArgInputs.insert(Input);
842 }
843}
844
845/// Find the input GVNs and the output values for a region of Instructions.
846/// Using the code extractor, we collect the inputs to the extracted function.
847///
848/// The \p Region can be identified as needing to be ignored in this function.
849/// It should be checked whether it should be ignored after a call to this
850/// function.
851///
852/// \param [in,out] Region - The region of code to be analyzed.
853/// \param [out] InputGVNs - The global value numbers for the extracted
854/// arguments.
855/// \param [in] NotSame - The global value numbers in the region that do not
856/// have the same constant value in the regions structurally similar to
857/// \p Region.
858/// \param [in] OutputMappings - The mapping of values that have been replaced
859/// by a new output value after extraction.
860/// \param [out] ArgInputs - The values of the inputs to the extracted function.
861/// \param [out] Outputs - The set of values extracted by the CodeExtractor
862/// as outputs.
864 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
865 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
866 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
867 IRSimilarityCandidate &C = *Region.Candidate;
868
869 // OverallInputs are the inputs to the region found by the CodeExtractor,
870 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
871 // allocas of values whose lifetimes are contained completely within the
872 // outlined region. PremappedInputs are the arguments found by the
873 // CodeExtractor, removing conditions such as sunken allocas, but that
874 // may need to be remapped due to the extracted output values replacing
875 // the original values. We use DummyOutputs for this first run of finding
876 // inputs and outputs since the outputs could change during findAllocas,
877 // the correct set of extracted outputs will be in the final Outputs ValueSet.
878 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
879 DummyOutputs;
880
881 // Use the code extractor to get the inputs and outputs, without sunken
882 // allocas or removing llvm.assumes.
883 CodeExtractor *CE = Region.CE;
884 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
885 assert(Region.StartBB && "Region must have a start BasicBlock!");
886 Function *OrigF = Region.StartBB->getParent();
887 CodeExtractorAnalysisCache CEAC(*OrigF);
888 BasicBlock *Dummy = nullptr;
889
890 // The region may be ineligible due to VarArgs in the parent function. In this
891 // case we ignore the region.
892 if (!CE->isEligible()) {
893 Region.IgnoreRegion = true;
894 return;
895 }
896
897 // Find if any values are going to be sunk into the function when extracted
898 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
899 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
900
901 // TODO: Support regions with sunken allocas: values whose lifetimes are
902 // contained completely within the outlined region. These are not guaranteed
903 // to be the same in every region, so we must elevate them all to arguments
904 // when they appear. If these values are not equal, it means there is some
905 // Input in OverallInputs that was removed for ArgInputs.
906 if (OverallInputs.size() != PremappedInputs.size()) {
907 Region.IgnoreRegion = true;
908 return;
909 }
910
911 findConstants(C, NotSame, InputGVNs);
912
913 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
914
915 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
916 ArgInputs);
917
918 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
919 // we need to make sure they are in a deterministic order.
920 stable_sort(InputGVNs);
921}
922
923/// Look over the inputs and map each input argument to an argument in the
924/// overall function for the OutlinableRegions. This creates a way to replace
925/// the arguments of the extracted function with the arguments of the new
926/// overall function.
927///
928/// \param [in,out] Region - The region of code to be analyzed.
929/// \param [in] InputGVNs - The global value numbering of the input values
930/// collected.
931/// \param [in] ArgInputs - The values of the arguments to the extracted
932/// function.
933static void
935 std::vector<unsigned> &InputGVNs,
936 SetVector<Value *> &ArgInputs) {
937
938 IRSimilarityCandidate &C = *Region.Candidate;
939 OutlinableGroup &Group = *Region.Parent;
940
941 // This counts the argument number in the overall function.
942 unsigned TypeIndex = 0;
943
944 // This counts the argument number in the extracted function.
945 unsigned OriginalIndex = 0;
946
947 // Find the mapping of the extracted arguments to the arguments for the
948 // overall function. Since there may be extra arguments in the overall
949 // function to account for the extracted constants, we have two different
950 // counters as we find extracted arguments, and as we come across overall
951 // arguments.
952
953 // Additionally, in our first pass, for the first extracted function,
954 // we find argument locations for the canonical value numbering. This
955 // numbering overrides any discovered location for the extracted code.
956 for (unsigned InputVal : InputGVNs) {
957 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
958 assert(CanonicalNumberOpt && "Canonical number not found?");
959 unsigned CanonicalNumber = *CanonicalNumberOpt;
960
961 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
962 assert(InputOpt && "Global value number not found?");
963 Value *Input = *InputOpt;
964
966 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
967
968 if (!Group.InputTypesSet) {
969 Group.ArgumentTypes.push_back(Input->getType());
970 // If the input value has a swifterr attribute, make sure to mark the
971 // argument in the overall function.
972 if (Input->isSwiftError()) {
973 assert(
974 !Group.SwiftErrorArgument &&
975 "Argument already marked with swifterr for this OutlinableGroup!");
976 Group.SwiftErrorArgument = TypeIndex;
977 }
978 }
979
980 // Check if we have a constant. If we do add it to the overall argument
981 // number to Constant map for the region, and continue to the next input.
982 if (Constant *CST = dyn_cast<Constant>(Input)) {
983 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
984 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
985 else {
987 std::make_pair(CanonicalNumber, TypeIndex));
988 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
989 }
990 TypeIndex++;
991 continue;
992 }
993
994 // It is not a constant, we create the mapping from extracted argument list
995 // to the overall argument list, using the canonical location, if it exists.
996 assert(ArgInputs.count(Input) && "Input cannot be found!");
997
998 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
999 if (OriginalIndex != AggArgIt->second)
1000 Region.ChangedArgOrder = true;
1001 Region.ExtractedArgToAgg.insert(
1002 std::make_pair(OriginalIndex, AggArgIt->second));
1003 Region.AggArgToExtracted.insert(
1004 std::make_pair(AggArgIt->second, OriginalIndex));
1005 } else {
1007 std::make_pair(CanonicalNumber, TypeIndex));
1008 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
1009 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
1010 }
1011 OriginalIndex++;
1012 TypeIndex++;
1013 }
1014
1015 // If the function type definitions for the OutlinableGroup holding the region
1016 // have not been set, set the length of the inputs here. We should have the
1017 // same inputs for all of the different regions contained in the
1018 // OutlinableGroup since they are all structurally similar to one another.
1019 if (!Group.InputTypesSet) {
1020 Group.NumAggregateInputs = TypeIndex;
1021 Group.InputTypesSet = true;
1022 }
1023
1024 Region.NumExtractedInputs = OriginalIndex;
1025}
1026
1027/// Check if the \p V has any uses outside of the region other than \p PN.
1028///
1029/// \param V [in] - The value to check.
1030/// \param PHILoc [in] - The location in the PHINode of \p V.
1031/// \param PN [in] - The PHINode using \p V.
1032/// \param Exits [in] - The potential blocks we exit to from the outlined
1033/// region.
1034/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1035/// \returns true if \p V has any use soutside its region other than \p PN.
1036static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1038 DenseSet<BasicBlock *> &BlocksInRegion) {
1039 // We check to see if the value is used by the PHINode from some other
1040 // predecessor not included in the region. If it is, we make sure
1041 // to keep it as an output.
1042 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1043 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1044 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1045 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1046 }))
1047 return true;
1048
1049 // Check if the value is used by any other instructions outside the region.
1050 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1051 Instruction *I = dyn_cast<Instruction>(U);
1052 if (!I)
1053 return false;
1054
1055 // If the use of the item is inside the region, we skip it. Uses
1056 // inside the region give us useful information about how the item could be
1057 // used as an output.
1058 BasicBlock *Parent = I->getParent();
1059 if (BlocksInRegion.contains(Parent))
1060 return false;
1061
1062 // If it's not a PHINode then we definitely know the use matters. This
1063 // output value will not completely combined with another item in a PHINode
1064 // as it is directly reference by another non-phi instruction
1065 if (!isa<PHINode>(I))
1066 return true;
1067
1068 // If we have a PHINode outside one of the exit locations, then it
1069 // can be considered an outside use as well. If there is a PHINode
1070 // contained in the Exit where this values use matters, it will be
1071 // caught when we analyze that PHINode.
1072 if (!Exits.contains(Parent))
1073 return true;
1074
1075 return false;
1076 });
1077}
1078
1079/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1080/// considered outputs. A PHINodes is an output when more than one incoming
1081/// value has been marked by the CodeExtractor as an output.
1082///
1083/// \param CurrentExitFromRegion [in] - The block to analyze.
1084/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1085/// region.
1086/// \param RegionBlocks [in] - The basic blocks in the region.
1087/// \param Outputs [in, out] - The existing outputs for the region, we may add
1088/// PHINodes to this as we find that they replace output values.
1089/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1090/// totally replaced by a PHINode.
1091/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1092/// in PHINodes, but have other uses, and should still be considered outputs.
1094 BasicBlock *CurrentExitFromRegion,
1095 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1096 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1097 DenseSet<Value *> &OutputsReplacedByPHINode,
1098 DenseSet<Value *> &OutputsWithNonPhiUses) {
1099 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1100 // Find all incoming values from the outlining region.
1101 SmallVector<unsigned, 2> IncomingVals;
1102 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1103 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1104 IncomingVals.push_back(I);
1105
1106 // Do not process PHI if there are no predecessors from region.
1107 unsigned NumIncomingVals = IncomingVals.size();
1108 if (NumIncomingVals == 0)
1109 continue;
1110
1111 // If there is one predecessor, we mark it as a value that needs to be kept
1112 // as an output.
1113 if (NumIncomingVals == 1) {
1114 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1115 OutputsWithNonPhiUses.insert(V);
1116 OutputsReplacedByPHINode.erase(V);
1117 continue;
1118 }
1119
1120 // This PHINode will be used as an output value, so we add it to our list.
1121 Outputs.insert(&PN);
1122
1123 // Not all of the incoming values should be ignored as other inputs and
1124 // outputs may have uses in outlined region. If they have other uses
1125 // outside of the single PHINode we should not skip over it.
1126 for (unsigned Idx : IncomingVals) {
1127 Value *V = PN.getIncomingValue(Idx);
1128 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1129 OutputsWithNonPhiUses.insert(V);
1130 OutputsReplacedByPHINode.erase(V);
1131 continue;
1132 }
1133 if (!OutputsWithNonPhiUses.contains(V))
1134 OutputsReplacedByPHINode.insert(V);
1135 }
1136 }
1137}
1138
1139// Represents the type for the unsigned number denoting the output number for
1140// phi node, along with the canonical number for the exit block.
1141using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1142// The list of canonical numbers for the incoming values to a PHINode.
1144// The pair type representing the set of canonical values being combined in the
1145// PHINode, along with the location data for the PHINode.
1146using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1147
1148/// Encode \p PND as an integer for easy lookup based on the argument location,
1149/// the parent BasicBlock canonical numbering, and the canonical numbering of
1150/// the values stored in the PHINode.
1151///
1152/// \param PND - The data to hash.
1153/// \returns The hash code of \p PND.
1155 return llvm::hash_combine(
1156 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1157 llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1158}
1159
1160/// Create a special GVN for PHINodes that will be used outside of
1161/// the region. We create a hash code based on the Canonical number of the
1162/// parent BasicBlock, the canonical numbering of the values stored in the
1163/// PHINode and the aggregate argument location. This is used to find whether
1164/// this PHINode type has been given a canonical numbering already. If not, we
1165/// assign it a value and store it for later use. The value is returned to
1166/// identify different output schemes for the set of regions.
1167///
1168/// \param Region - The region that \p PN is an output for.
1169/// \param PN - The PHINode we are analyzing.
1170/// \param Blocks - The blocks for the region we are analyzing.
1171/// \param AggArgIdx - The argument \p PN will be stored into.
1172/// \returns An optional holding the assigned canonical number, or std::nullopt
1173/// if there is some attribute of the PHINode blocking it from being used.
1174static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1175 PHINode *PN,
1177 unsigned AggArgIdx) {
1178 OutlinableGroup &Group = *Region.Parent;
1179 IRSimilarityCandidate &Cand = *Region.Candidate;
1180 BasicBlock *PHIBB = PN->getParent();
1181 CanonList PHIGVNs;
1182 Value *Incoming;
1183 BasicBlock *IncomingBlock;
1184 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1185 Incoming = PN->getIncomingValue(Idx);
1186 IncomingBlock = PN->getIncomingBlock(Idx);
1187 // If we cannot find a GVN, and the incoming block is included in the region
1188 // this means that the input to the PHINode is not included in the region we
1189 // are trying to analyze, meaning, that if it was outlined, we would be
1190 // adding an extra input. We ignore this case for now, and so ignore the
1191 // region.
1192 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1193 if (!OGVN && Blocks.contains(IncomingBlock)) {
1194 Region.IgnoreRegion = true;
1195 return std::nullopt;
1196 }
1197
1198 // If the incoming block isn't in the region, we don't have to worry about
1199 // this incoming value.
1200 if (!Blocks.contains(IncomingBlock))
1201 continue;
1202
1203 // Collect the canonical numbers of the values in the PHINode.
1204 unsigned GVN = *OGVN;
1205 OGVN = Cand.getCanonicalNum(GVN);
1206 assert(OGVN && "No GVN found for incoming value?");
1207 PHIGVNs.push_back(*OGVN);
1208
1209 // Find the incoming block and use the canonical numbering as well to define
1210 // the hash for the PHINode.
1211 OGVN = Cand.getGVN(IncomingBlock);
1212
1213 // If there is no number for the incoming block, it is because we have
1214 // split the candidate basic blocks. So we use the previous block that it
1215 // was split from to find the valid global value numbering for the PHINode.
1216 if (!OGVN) {
1217 assert(Cand.getStartBB() == IncomingBlock &&
1218 "Unknown basic block used in exit path PHINode.");
1219
1220 BasicBlock *PrevBlock = nullptr;
1221 // Iterate over the predecessors to the incoming block of the
1222 // PHINode, when we find a block that is not contained in the region
1223 // we know that this is the first block that we split from, and should
1224 // have a valid global value numbering.
1225 for (BasicBlock *Pred : predecessors(IncomingBlock))
1226 if (!Blocks.contains(Pred)) {
1227 PrevBlock = Pred;
1228 break;
1229 }
1230 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1231 OGVN = Cand.getGVN(PrevBlock);
1232 }
1233 GVN = *OGVN;
1234 OGVN = Cand.getCanonicalNum(GVN);
1235 assert(OGVN && "No GVN found for incoming block?");
1236 PHIGVNs.push_back(*OGVN);
1237 }
1238
1239 // Now that we have the GVNs for the incoming values, we are going to combine
1240 // them with the GVN of the incoming bock, and the output location of the
1241 // PHINode to generate a hash value representing this instance of the PHINode.
1244 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1245 assert(BBGVN && "Could not find GVN for the incoming block!");
1246
1247 BBGVN = Cand.getCanonicalNum(*BBGVN);
1248 assert(BBGVN && "Could not find canonical number for the incoming block!");
1249 // Create a pair of the exit block canonical value, and the aggregate
1250 // argument location, connected to the canonical numbers stored in the
1251 // PHINode.
1252 PHINodeData TemporaryPair =
1253 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1254 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1255
1256 // Look for and create a new entry in our connection between canonical
1257 // numbers for PHINodes, and the set of objects we just created.
1258 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1259 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1260 bool Inserted = false;
1261 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1262 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1263 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1264 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1265 }
1266
1267 return GVNToPHIIt->second;
1268}
1269
1270/// Create a mapping of the output arguments for the \p Region to the output
1271/// arguments of the overall outlined function.
1272///
1273/// \param [in,out] Region - The region of code to be analyzed.
1274/// \param [in] Outputs - The values found by the code extractor.
1275static void
1277 SetVector<Value *> &Outputs) {
1278 OutlinableGroup &Group = *Region.Parent;
1279 IRSimilarityCandidate &C = *Region.Candidate;
1280
1282 DenseSet<BasicBlock *> BlocksInRegion;
1283 C.getBasicBlocks(BlocksInRegion, BE);
1284
1285 // Find the exits to the region.
1287 for (BasicBlock *Block : BE)
1288 for (BasicBlock *Succ : successors(Block))
1289 if (!BlocksInRegion.contains(Succ))
1290 Exits.insert(Succ);
1291
1292 // After determining which blocks exit to PHINodes, we add these PHINodes to
1293 // the set of outputs to be processed. We also check the incoming values of
1294 // the PHINodes for whether they should no longer be considered outputs.
1295 DenseSet<Value *> OutputsReplacedByPHINode;
1296 DenseSet<Value *> OutputsWithNonPhiUses;
1297 for (BasicBlock *ExitBB : Exits)
1298 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1299 OutputsReplacedByPHINode,
1300 OutputsWithNonPhiUses);
1301
1302 // This counts the argument number in the extracted function.
1303 unsigned OriginalIndex = Region.NumExtractedInputs;
1304
1305 // This counts the argument number in the overall function.
1306 unsigned TypeIndex = Group.NumAggregateInputs;
1307 bool TypeFound;
1308 DenseSet<unsigned> AggArgsUsed;
1309
1310 // Iterate over the output types and identify if there is an aggregate pointer
1311 // type whose base type matches the current output type. If there is, we mark
1312 // that we will use this output register for this value. If not we add another
1313 // type to the overall argument type list. We also store the GVNs used for
1314 // stores to identify which values will need to be moved into an special
1315 // block that holds the stores to the output registers.
1316 for (Value *Output : Outputs) {
1317 TypeFound = false;
1318 // We can do this since it is a result value, and will have a number
1319 // that is necessarily the same. BUT if in the future, the instructions
1320 // do not have to be in same order, but are functionally the same, we will
1321 // have to use a different scheme, as one-to-one correspondence is not
1322 // guaranteed.
1323 unsigned ArgumentSize = Group.ArgumentTypes.size();
1324
1325 // If the output is combined in a PHINode, we make sure to skip over it.
1326 if (OutputsReplacedByPHINode.contains(Output))
1327 continue;
1328
1329 unsigned AggArgIdx = 0;
1330 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1331 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1332 continue;
1333
1334 if (AggArgsUsed.contains(Jdx))
1335 continue;
1336
1337 TypeFound = true;
1338 AggArgsUsed.insert(Jdx);
1339 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1340 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1341 AggArgIdx = Jdx;
1342 break;
1343 }
1344
1345 // We were unable to find an unused type in the output type set that matches
1346 // the output, so we add a pointer type to the argument types of the overall
1347 // function to handle this output and create a mapping to it.
1348 if (!TypeFound) {
1349 Group.ArgumentTypes.push_back(Output->getType()->getPointerTo(
1350 M.getDataLayout().getAllocaAddrSpace()));
1351 // Mark the new pointer type as the last value in the aggregate argument
1352 // list.
1353 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1354 AggArgsUsed.insert(ArgTypeIdx);
1355 Region.ExtractedArgToAgg.insert(
1356 std::make_pair(OriginalIndex, ArgTypeIdx));
1357 Region.AggArgToExtracted.insert(
1358 std::make_pair(ArgTypeIdx, OriginalIndex));
1359 AggArgIdx = ArgTypeIdx;
1360 }
1361
1362 // TODO: Adapt to the extra input from the PHINode.
1363 PHINode *PN = dyn_cast<PHINode>(Output);
1364
1365 std::optional<unsigned> GVN;
1366 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1367 // Values outside the region can be combined into PHINode when we
1368 // have multiple exits. We collect both of these into a list to identify
1369 // which values are being used in the PHINode. Each list identifies a
1370 // different PHINode, and a different output. We store the PHINode as it's
1371 // own canonical value. These canonical values are also dependent on the
1372 // output argument it is saved to.
1373
1374 // If two PHINodes have the same canonical values, but different aggregate
1375 // argument locations, then they will have distinct Canonical Values.
1376 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1377 if (!GVN)
1378 return;
1379 } else {
1380 // If we do not have a PHINode we use the global value numbering for the
1381 // output value, to find the canonical number to add to the set of stored
1382 // values.
1383 GVN = C.getGVN(Output);
1384 GVN = C.getCanonicalNum(*GVN);
1385 }
1386
1387 // Each region has a potentially unique set of outputs. We save which
1388 // values are output in a list of canonical values so we can differentiate
1389 // among the different store schemes.
1390 Region.GVNStores.push_back(*GVN);
1391
1392 OriginalIndex++;
1393 TypeIndex++;
1394 }
1395
1396 // We sort the stored values to make sure that we are not affected by analysis
1397 // order when determining what combination of items were stored.
1398 stable_sort(Region.GVNStores);
1399}
1400
1401void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1402 DenseSet<unsigned> &NotSame) {
1403 std::vector<unsigned> Inputs;
1404 SetVector<Value *> ArgInputs, Outputs;
1405
1406 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1407 Outputs);
1408
1409 if (Region.IgnoreRegion)
1410 return;
1411
1412 // Map the inputs found by the CodeExtractor to the arguments found for
1413 // the overall function.
1415
1416 // Map the outputs found by the CodeExtractor to the arguments found for
1417 // the overall function.
1419}
1420
1421/// Replace the extracted function in the Region with a call to the overall
1422/// function constructed from the deduplicated similar regions, replacing and
1423/// remapping the values passed to the extracted function as arguments to the
1424/// new arguments of the overall function.
1425///
1426/// \param [in] M - The module to outline from.
1427/// \param [in] Region - The regions of extracted code to be replaced with a new
1428/// function.
1429/// \returns a call instruction with the replaced function.
1431 std::vector<Value *> NewCallArgs;
1433
1434 OutlinableGroup &Group = *Region.Parent;
1435 CallInst *Call = Region.Call;
1436 assert(Call && "Call to replace is nullptr?");
1437 Function *AggFunc = Group.OutlinedFunction;
1438 assert(AggFunc && "Function to replace with is nullptr?");
1439
1440 // If the arguments are the same size, there are not values that need to be
1441 // made into an argument, the argument ordering has not been change, or
1442 // different output registers to handle. We can simply replace the called
1443 // function in this case.
1444 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1445 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1446 << *AggFunc << " with same number of arguments\n");
1447 Call->setCalledFunction(AggFunc);
1448 return Call;
1449 }
1450
1451 // We have a different number of arguments than the new function, so
1452 // we need to use our previously mappings off extracted argument to overall
1453 // function argument, and constants to overall function argument to create the
1454 // new argument list.
1455 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1456
1457 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1458 Group.OutputGVNCombinations.size() > 1) {
1459 // If we are on the last argument, and we need to differentiate between
1460 // output blocks, add an integer to the argument list to determine
1461 // what block to take
1462 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1463 << Region.OutputBlockNum << "\n");
1464 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1465 Region.OutputBlockNum));
1466 continue;
1467 }
1468
1469 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1470 if (ArgPair != Region.AggArgToExtracted.end()) {
1471 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1472 // If we found the mapping from the extracted function to the overall
1473 // function, we simply add it to the argument list. We use the same
1474 // value, it just needs to honor the new order of arguments.
1475 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1476 << *ArgumentValue << "\n");
1477 NewCallArgs.push_back(ArgumentValue);
1478 continue;
1479 }
1480
1481 // If it is a constant, we simply add it to the argument list as a value.
1482 if (Region.AggArgToConstant.contains(AggArgIdx)) {
1483 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1484 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1485 << *CST << "\n");
1486 NewCallArgs.push_back(CST);
1487 continue;
1488 }
1489
1490 // Add a nullptr value if the argument is not found in the extracted
1491 // function. If we cannot find a value, it means it is not in use
1492 // for the region, so we should not pass anything to it.
1493 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1494 NewCallArgs.push_back(ConstantPointerNull::get(
1495 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1496 }
1497
1498 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1499 << *AggFunc << " with new set of arguments\n");
1500 // Create the new call instruction and erase the old one.
1501 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1502 Call);
1503
1504 // It is possible that the call to the outlined function is either the first
1505 // instruction is in the new block, the last instruction, or both. If either
1506 // of these is the case, we need to make sure that we replace the instruction
1507 // in the IRInstructionData struct with the new call.
1508 CallInst *OldCall = Region.Call;
1509 if (Region.NewFront->Inst == OldCall)
1510 Region.NewFront->Inst = Call;
1511 if (Region.NewBack->Inst == OldCall)
1512 Region.NewBack->Inst = Call;
1513
1514 // Transfer any debug information.
1515 Call->setDebugLoc(Region.Call->getDebugLoc());
1516 // Since our output may determine which branch we go to, we make sure to
1517 // propogate this new call value through the module.
1518 OldCall->replaceAllUsesWith(Call);
1519
1520 // Remove the old instruction.
1521 OldCall->eraseFromParent();
1522 Region.Call = Call;
1523
1524 // Make sure that the argument in the new function has the SwiftError
1525 // argument.
1526 if (Group.SwiftErrorArgument)
1527 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1528
1529 return Call;
1530}
1531
1532/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1533/// for \p RetVal.
1534///
1535/// \param Group - The OutlinableGroup containing the information about the
1536/// overall outlined function.
1537/// \param RetVal - The return value or exit option that we are currently
1538/// evaluating.
1539/// \returns The found or newly created BasicBlock to contain the needed
1540/// PHINodes to be used as outputs.
1543 ReturnBlockForRetVal;
1544 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1545 ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1546 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1547 "Could not find output value!");
1548 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1549
1550 // Find if a PHIBlock exists for this return value already. If it is
1551 // the first time we are analyzing this, we will not, so we record it.
1552 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1553 if (PhiBlockForRetVal != Group.PHIBlocks.end())
1554 return PhiBlockForRetVal->second;
1555
1556 // If we did not find a block, we create one, and insert it into the
1557 // overall function and record it.
1558 bool Inserted = false;
1559 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1560 ReturnBB->getParent());
1561 std::tie(PhiBlockForRetVal, Inserted) =
1562 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1563
1564 // We find the predecessors of the return block in the newly created outlined
1565 // function in order to point them to the new PHIBlock rather than the already
1566 // existing return block.
1567 SmallVector<BranchInst *, 2> BranchesToChange;
1568 for (BasicBlock *Pred : predecessors(ReturnBB))
1569 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1570
1571 // Now we mark the branch instructions found, and change the references of the
1572 // return block to the newly created PHIBlock.
1573 for (BranchInst *BI : BranchesToChange)
1574 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1575 if (BI->getSuccessor(Succ) != ReturnBB)
1576 continue;
1577 BI->setSuccessor(Succ, PHIBlock);
1578 }
1579
1580 BranchInst::Create(ReturnBB, PHIBlock);
1581
1582 return PhiBlockForRetVal->second;
1583}
1584
1585/// For the function call now representing the \p Region, find the passed value
1586/// to that call that represents Argument \p A at the call location if the
1587/// call has already been replaced with a call to the overall, aggregate
1588/// function.
1589///
1590/// \param A - The Argument to get the passed value for.
1591/// \param Region - The extracted Region corresponding to the outlined function.
1592/// \returns The Value representing \p A at the call site.
1593static Value *
1595 const OutlinableRegion &Region) {
1596 // If we don't need to adjust the argument number at all (since the call
1597 // has already been replaced by a call to the overall outlined function)
1598 // we can just get the specified argument.
1599 return Region.Call->getArgOperand(A->getArgNo());
1600}
1601
1602/// For the function call now representing the \p Region, find the passed value
1603/// to that call that represents Argument \p A at the call location if the
1604/// call has only been replaced by the call to the aggregate function.
1605///
1606/// \param A - The Argument to get the passed value for.
1607/// \param Region - The extracted Region corresponding to the outlined function.
1608/// \returns The Value representing \p A at the call site.
1609static Value *
1611 const OutlinableRegion &Region) {
1612 unsigned ArgNum = A->getArgNo();
1613
1614 // If it is a constant, we can look at our mapping from when we created
1615 // the outputs to figure out what the constant value is.
1616 if (Region.AggArgToConstant.count(ArgNum))
1617 return Region.AggArgToConstant.find(ArgNum)->second;
1618
1619 // If it is not a constant, and we are not looking at the overall function, we
1620 // need to adjust which argument we are looking at.
1621 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1622 return Region.Call->getArgOperand(ArgNum);
1623}
1624
1625/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1626///
1627/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1628/// \param Region [in] - The OutlinableRegion containing \p PN.
1629/// \param OutputMappings [in] - The mapping of output values from outlined
1630/// region to their original values.
1631/// \param CanonNums [out] - The canonical numbering for the incoming values to
1632/// \p PN paired with their incoming block.
1633/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1634/// of \p Region rather than the overall function's call.
1637 const DenseMap<Value *, Value *> &OutputMappings,
1638 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1639 bool ReplacedWithOutlinedCall = true) {
1640 // Iterate over the incoming values.
1641 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1642 Value *IVal = PN->getIncomingValue(Idx);
1643 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1644 // If we have an argument as incoming value, we need to grab the passed
1645 // value from the call itself.
1646 if (Argument *A = dyn_cast<Argument>(IVal)) {
1647 if (ReplacedWithOutlinedCall)
1649 else
1651 }
1652
1653 // Get the original value if it has been replaced by an output value.
1654 IVal = findOutputMapping(OutputMappings, IVal);
1655
1656 // Find and add the canonical number for the incoming value.
1657 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1658 assert(GVN && "No GVN for incoming value");
1659 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1660 assert(CanonNum && "No Canonical Number for GVN");
1661 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1662 }
1663}
1664
1665/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1666/// in order to condense the number of instructions added to the outlined
1667/// function.
1668///
1669/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1670/// \param Region [in] - The OutlinableRegion containing \p PN.
1671/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1672/// \p PN in.
1673/// \param OutputMappings [in] - The mapping of output values from outlined
1674/// region to their original values.
1675/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1676/// matched.
1677/// \return the newly found or created PHINode in \p OverallPhiBlock.
1678static PHINode*
1680 BasicBlock *OverallPhiBlock,
1681 const DenseMap<Value *, Value *> &OutputMappings,
1682 DenseSet<PHINode *> &UsedPHIs) {
1683 OutlinableGroup &Group = *Region.Parent;
1684
1685
1686 // A list of the canonical numbering assigned to each incoming value, paired
1687 // with the incoming block for the PHINode passed into this function.
1689
1690 // We have to use the extracted function since we have merged this region into
1691 // the overall function yet. We make sure to reassign the argument numbering
1692 // since it is possible that the argument ordering is different between the
1693 // functions.
1694 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1695 /* ReplacedWithOutlinedCall = */ false);
1696
1697 OutlinableRegion *FirstRegion = Group.Regions[0];
1698
1699 // A list of the canonical numbering assigned to each incoming value, paired
1700 // with the incoming block for the PHINode that we are currently comparing
1701 // the passed PHINode to.
1703
1704 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1705 // the uses of the PHINode we are searching for, with the found PHINode.
1706 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1707 // If this PHINode has already been matched to another PHINode to be merged,
1708 // we skip it.
1709 if (UsedPHIs.contains(&CurrPN))
1710 continue;
1711
1712 CurrentCanonNums.clear();
1713 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1714 /* ReplacedWithOutlinedCall = */ true);
1715
1716 // If the list of incoming values is not the same length, then they cannot
1717 // match since there is not an analogue for each incoming value.
1718 if (PNCanonNums.size() != CurrentCanonNums.size())
1719 continue;
1720
1721 bool FoundMatch = true;
1722
1723 // We compare the canonical value for each incoming value in the passed
1724 // in PHINode to one already present in the outlined region. If the
1725 // incoming values do not match, then the PHINodes do not match.
1726
1727 // We also check to make sure that the incoming block matches as well by
1728 // finding the corresponding incoming block in the combined outlined region
1729 // for the current outlined region.
1730 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1731 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1732 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1733 if (ToCompareTo.first != ToAdd.first) {
1734 FoundMatch = false;
1735 break;
1736 }
1737
1738 BasicBlock *CorrespondingBlock =
1739 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1740 assert(CorrespondingBlock && "Found block is nullptr");
1741 if (CorrespondingBlock != ToCompareTo.second) {
1742 FoundMatch = false;
1743 break;
1744 }
1745 }
1746
1747 // If all incoming values and branches matched, then we can merge
1748 // into the found PHINode.
1749 if (FoundMatch) {
1750 UsedPHIs.insert(&CurrPN);
1751 return &CurrPN;
1752 }
1753 }
1754
1755 // If we've made it here, it means we weren't able to replace the PHINode, so
1756 // we must insert it ourselves.
1757 PHINode *NewPN = cast<PHINode>(PN.clone());
1758 NewPN->insertBefore(&*OverallPhiBlock->begin());
1759 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1760 Idx++) {
1761 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1762 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1763
1764 // Find corresponding basic block in the overall function for the incoming
1765 // block.
1766 BasicBlock *BlockToUse =
1767 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1768 NewPN->setIncomingBlock(Idx, BlockToUse);
1769
1770 // If we have an argument we make sure we replace using the argument from
1771 // the correct function.
1772 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1773 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1774 NewPN->setIncomingValue(Idx, Val);
1775 continue;
1776 }
1777
1778 // Find the corresponding value in the overall function.
1779 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1780 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1781 assert(Val && "Value is nullptr?");
1783 FirstRegion->RemappedArguments.find(Val);
1784 if (RemappedIt != FirstRegion->RemappedArguments.end())
1785 Val = RemappedIt->second;
1786 NewPN->setIncomingValue(Idx, Val);
1787 }
1788 return NewPN;
1789}
1790
1791// Within an extracted function, replace the argument uses of the extracted
1792// region with the arguments of the function for an OutlinableGroup.
1793//
1794/// \param [in] Region - The region of extracted code to be changed.
1795/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1796/// region.
1797/// \param [in] FirstFunction - A flag to indicate whether we are using this
1798/// function to define the overall outlined function for all the regions, or
1799/// if we are operating on one of the following regions.
1800static void
1803 const DenseMap<Value *, Value *> &OutputMappings,
1804 bool FirstFunction = false) {
1805 OutlinableGroup &Group = *Region.Parent;
1806 assert(Region.ExtractedFunction && "Region has no extracted function?");
1807
1808 Function *DominatingFunction = Region.ExtractedFunction;
1809 if (FirstFunction)
1810 DominatingFunction = Group.OutlinedFunction;
1811 DominatorTree DT(*DominatingFunction);
1812 DenseSet<PHINode *> UsedPHIs;
1813
1814 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1815 ArgIdx++) {
1816 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1817 "No mapping from extracted to outlined?");
1818 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1819 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1820 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1821 // The argument is an input, so we can simply replace it with the overall
1822 // argument value
1823 if (ArgIdx < Region.NumExtractedInputs) {
1824 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1825 << *Region.ExtractedFunction << " with " << *AggArg
1826 << " in function " << *Group.OutlinedFunction << "\n");
1827 Arg->replaceAllUsesWith(AggArg);
1828 Value *V = Region.Call->getArgOperand(ArgIdx);
1829 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1830 continue;
1831 }
1832
1833 // If we are replacing an output, we place the store value in its own
1834 // block inside the overall function before replacing the use of the output
1835 // in the function.
1836 assert(Arg->hasOneUse() && "Output argument can only have one use");
1837 User *InstAsUser = Arg->user_back();
1838 assert(InstAsUser && "User is nullptr!");
1839
1840 Instruction *I = cast<Instruction>(InstAsUser);
1841 BasicBlock *BB = I->getParent();
1842 SmallVector<BasicBlock *, 4> Descendants;
1843 DT.getDescendants(BB, Descendants);
1844 bool EdgeAdded = false;
1845 if (Descendants.size() == 0) {
1846 EdgeAdded = true;
1847 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1848 DT.getDescendants(BB, Descendants);
1849 }
1850
1851 // Iterate over the following blocks, looking for return instructions,
1852 // if we find one, find the corresponding output block for the return value
1853 // and move our store instruction there.
1854 for (BasicBlock *DescendBB : Descendants) {
1855 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1856 if (!RI)
1857 continue;
1858 Value *RetVal = RI->getReturnValue();
1859 auto VBBIt = OutputBBs.find(RetVal);
1860 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1861
1862 // If this is storing a PHINode, we must make sure it is included in the
1863 // overall function.
1864 StoreInst *SI = cast<StoreInst>(I);
1865
1866 Value *ValueOperand = SI->getValueOperand();
1867
1868 StoreInst *NewI = cast<StoreInst>(I->clone());
1869 NewI->setDebugLoc(DebugLoc());
1870 BasicBlock *OutputBB = VBBIt->second;
1871 NewI->insertInto(OutputBB, OutputBB->end());
1872 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1873 << *OutputBB << "\n");
1874
1875 // If this is storing a PHINode, we must make sure it is included in the
1876 // overall function.
1877 if (!isa<PHINode>(ValueOperand) ||
1878 Region.Candidate->getGVN(ValueOperand).has_value()) {
1879 if (FirstFunction)
1880 continue;
1881 Value *CorrVal =
1882 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1883 assert(CorrVal && "Value is nullptr?");
1884 NewI->setOperand(0, CorrVal);
1885 continue;
1886 }
1887 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1888 // If it has a value, it was not split by the code extractor, which
1889 // is what we are looking for.
1890 if (Region.Candidate->getGVN(PN))
1891 continue;
1892
1893 // We record the parent block for the PHINode in the Region so that
1894 // we can exclude it from checks later on.
1895 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1896
1897 // If this is the first function, we do not need to worry about mergiing
1898 // this with any other block in the overall outlined function, so we can
1899 // just continue.
1900 if (FirstFunction) {
1901 BasicBlock *PHIBlock = PN->getParent();
1902 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1903 continue;
1904 }
1905
1906 // We look for the aggregate block that contains the PHINodes leading into
1907 // this exit path. If we can't find one, we create one.
1908 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1909
1910 // For our PHINode, we find the combined canonical numbering, and
1911 // attempt to find a matching PHINode in the overall PHIBlock. If we
1912 // cannot, we copy the PHINode and move it into this new block.
1913 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1914 OutputMappings, UsedPHIs);
1915 NewI->setOperand(0, NewPN);
1916 }
1917
1918 // If we added an edge for basic blocks without a predecessor, we remove it
1919 // here.
1920 if (EdgeAdded)
1921 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1922 I->eraseFromParent();
1923
1924 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1925 << *Region.ExtractedFunction << " with " << *AggArg
1926 << " in function " << *Group.OutlinedFunction << "\n");
1927 Arg->replaceAllUsesWith(AggArg);
1928 }
1929}
1930
1931/// Within an extracted function, replace the constants that need to be lifted
1932/// into arguments with the actual argument.
1933///
1934/// \param Region [in] - The region of extracted code to be changed.
1936 OutlinableGroup &Group = *Region.Parent;
1937 // Iterate over the constants that need to be elevated into arguments
1938 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1939 unsigned AggArgIdx = Const.first;
1940 Function *OutlinedFunction = Group.OutlinedFunction;
1941 assert(OutlinedFunction && "Overall Function is not defined?");
1942 Constant *CST = Const.second;
1943 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1944 // Identify the argument it will be elevated to, and replace instances of
1945 // that constant in the function.
1946
1947 // TODO: If in the future constants do not have one global value number,
1948 // i.e. a constant 1 could be mapped to several values, this check will
1949 // have to be more strict. It cannot be using only replaceUsesWithIf.
1950
1951 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1952 << " in function " << *OutlinedFunction << " with "
1953 << *Arg << "\n");
1954 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1955 if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1956 return I->getFunction() == OutlinedFunction;
1957 return false;
1958 });
1959 }
1960}
1961
1962/// It is possible that there is a basic block that already performs the same
1963/// stores. This returns a duplicate block, if it exists
1964///
1965/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1966/// \param OutputStoreBBs [in] The existing output blocks.
1967/// \returns an optional value with the number output block if there is a match.
1968std::optional<unsigned> findDuplicateOutputBlock(
1970 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1971
1972 bool Mismatch = false;
1973 unsigned MatchingNum = 0;
1974 // We compare the new set output blocks to the other sets of output blocks.
1975 // If they are the same number, and have identical instructions, they are
1976 // considered to be the same.
1977 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1978 Mismatch = false;
1979 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1981 OutputBBs.find(VToB.first);
1982 if (OutputBBIt == OutputBBs.end()) {
1983 Mismatch = true;
1984 break;
1985 }
1986
1987 BasicBlock *CompBB = VToB.second;
1988 BasicBlock *OutputBB = OutputBBIt->second;
1989 if (CompBB->size() - 1 != OutputBB->size()) {
1990 Mismatch = true;
1991 break;
1992 }
1993
1994 BasicBlock::iterator NIt = OutputBB->begin();
1995 for (Instruction &I : *CompBB) {
1996 if (isa<BranchInst>(&I))
1997 continue;
1998
1999 if (!I.isIdenticalTo(&(*NIt))) {
2000 Mismatch = true;
2001 break;
2002 }
2003
2004 NIt++;
2005 }
2006 }
2007
2008 if (!Mismatch)
2009 return MatchingNum;
2010
2011 MatchingNum++;
2012 }
2013
2014 return std::nullopt;
2015}
2016
2017/// Remove empty output blocks from the outlined region.
2018///
2019/// \param BlocksToPrune - Mapping of return values output blocks for the \p
2020/// Region.
2021/// \param Region - The OutlinableRegion we are analyzing.
2022static bool
2025 bool AllRemoved = true;
2026 Value *RetValueForBB;
2027 BasicBlock *NewBB;
2029 // Iterate over the output blocks created in the outlined section.
2030 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2031 RetValueForBB = VtoBB.first;
2032 NewBB = VtoBB.second;
2033
2034 // If there are no instructions, we remove it from the module, and also
2035 // mark the value for removal from the return value to output block mapping.
2036 if (NewBB->size() == 0) {
2037 NewBB->eraseFromParent();
2038 ToRemove.push_back(RetValueForBB);
2039 continue;
2040 }
2041
2042 // Mark that we could not remove all the blocks since they were not all
2043 // empty.
2044 AllRemoved = false;
2045 }
2046
2047 // Remove the return value from the mapping.
2048 for (Value *V : ToRemove)
2049 BlocksToPrune.erase(V);
2050
2051 // Mark the region as having the no output scheme.
2052 if (AllRemoved)
2053 Region.OutputBlockNum = -1;
2054
2055 return AllRemoved;
2056}
2057
2058/// For the outlined section, move needed the StoreInsts for the output
2059/// registers into their own block. Then, determine if there is a duplicate
2060/// output block already created.
2061///
2062/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2063/// \param [in] Region - The OutlinableRegion that is being analyzed.
2064/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2065/// placed in.
2066/// \param [in] EndBBs - the final blocks of the extracted function.
2067/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2068/// been replaced by a new output value.
2069/// \param [in,out] OutputStoreBBs - The existing output blocks.
2074 const DenseMap<Value *, Value *> &OutputMappings,
2075 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2076 // If none of the output blocks have any instructions, this means that we do
2077 // not have to determine if it matches any of the other output schemes, and we
2078 // don't have to do anything else.
2079 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2080 return;
2081
2082 // Determine is there is a duplicate set of blocks.
2083 std::optional<unsigned> MatchingBB =
2084 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2085
2086 // If there is, we remove the new output blocks. If it does not,
2087 // we add it to our list of sets of output blocks.
2088 if (MatchingBB) {
2089 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2090 << Region.ExtractedFunction << " to " << *MatchingBB);
2091
2092 Region.OutputBlockNum = *MatchingBB;
2093 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2094 VtoBB.second->eraseFromParent();
2095 return;
2096 }
2097
2098 Region.OutputBlockNum = OutputStoreBBs.size();
2099
2100 Value *RetValueForBB;
2101 BasicBlock *NewBB;
2102 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2103 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2104 RetValueForBB = VtoBB.first;
2105 NewBB = VtoBB.second;
2107 EndBBs.find(RetValueForBB);
2108 LLVM_DEBUG(dbgs() << "Create output block for region in"
2109 << Region.ExtractedFunction << " to "
2110 << *NewBB);
2111 BranchInst::Create(VBBIt->second, NewBB);
2112 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2113 }
2114}
2115
2116/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2117/// before creating a basic block for each \p NewMap, and inserting into the new
2118/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2119///
2120/// \param OldMap [in] - The mapping to base the new mapping off of.
2121/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2122/// \param ParentFunc [in] - The function to put the new basic block in.
2123/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2124/// by an index value.
2127 Function *ParentFunc, Twine BaseName) {
2128 unsigned Idx = 0;
2129 std::vector<Value *> SortedKeys;
2130
2131 getSortedConstantKeys(SortedKeys, OldMap);
2132
2133 for (Value *RetVal : SortedKeys) {
2135 ParentFunc->getContext(),
2136 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2137 ParentFunc);
2138 NewMap.insert(std::make_pair(RetVal, NewBB));
2139 }
2140}
2141
2142/// Create the switch statement for outlined function to differentiate between
2143/// all the output blocks.
2144///
2145/// For the outlined section, determine if an outlined block already exists that
2146/// matches the needed stores for the extracted section.
2147/// \param [in] M - The module we are outlining from.
2148/// \param [in] OG - The group of regions to be outlined.
2149/// \param [in] EndBBs - The final blocks of the extracted function.
2150/// \param [in,out] OutputStoreBBs - The existing output blocks.
2153 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2154 // We only need the switch statement if there is more than one store
2155 // combination, or there is more than one set of output blocks. The first
2156 // will occur when we store different sets of values for two different
2157 // regions. The second will occur when we have two outputs that are combined
2158 // in a PHINode outside of the region in one outlined instance, and are used
2159 // seaparately in another. This will create the same set of OutputGVNs, but
2160 // will generate two different output schemes.
2161 if (OG.OutputGVNCombinations.size() > 1) {
2162 Function *AggFunc = OG.OutlinedFunction;
2163 // Create a final block for each different return block.
2165 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2166
2167 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2168 std::pair<Value *, BasicBlock *> &OutputBlock =
2169 *OG.EndBBs.find(RetBlockPair.first);
2170 BasicBlock *ReturnBlock = RetBlockPair.second;
2171 BasicBlock *EndBB = OutputBlock.second;
2172 Instruction *Term = EndBB->getTerminator();
2173 // Move the return value to the final block instead of the original exit
2174 // stub.
2175 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2176 // Put the switch statement in the old end basic block for the function
2177 // with a fall through to the new return block.
2178 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2179 << OutputStoreBBs.size() << "\n");
2180 SwitchInst *SwitchI =
2181 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2182 ReturnBlock, OutputStoreBBs.size(), EndBB);
2183
2184 unsigned Idx = 0;
2185 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2187 OutputStoreBB.find(OutputBlock.first);
2188
2189 if (OSBBIt == OutputStoreBB.end())
2190 continue;
2191
2192 BasicBlock *BB = OSBBIt->second;
2193 SwitchI->addCase(
2194 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2195 Term = BB->getTerminator();
2196 Term->setSuccessor(0, ReturnBlock);
2197 Idx++;
2198 }
2199 }
2200 return;
2201 }
2202
2203 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2204
2205 // If there needs to be stores, move them from the output blocks to their
2206 // corresponding ending block. We do not check that the OutputGVNCombinations
2207 // is equal to 1 here since that could just been the case where there are 0
2208 // outputs. Instead, we check whether there is more than one set of output
2209 // blocks since this is the only case where we would have to move the
2210 // stores, and erase the extraneous blocks.
2211 if (OutputStoreBBs.size() == 1) {
2212 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2213 << *OG.OutlinedFunction << "\n");
2214 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2215 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2217 EndBBs.find(VBPair.first);
2218 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2219 BasicBlock *EndBB = EndBBIt->second;
2220 BasicBlock *OutputBB = VBPair.second;
2221 Instruction *Term = OutputBB->getTerminator();
2222 Term->eraseFromParent();
2223 Term = EndBB->getTerminator();
2224 moveBBContents(*OutputBB, *EndBB);
2225 Term->moveBefore(*EndBB, EndBB->end());
2226 OutputBB->eraseFromParent();
2227 }
2228 }
2229}
2230
2231/// Fill the new function that will serve as the replacement function for all of
2232/// the extracted regions of a certain structure from the first region in the
2233/// list of regions. Replace this first region's extracted function with the
2234/// new overall function.
2235///
2236/// \param [in] M - The module we are outlining from.
2237/// \param [in] CurrentGroup - The group of regions to be outlined.
2238/// \param [in,out] OutputStoreBBs - The output blocks for each different
2239/// set of stores needed for the different functions.
2240/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2241/// once outlining is complete.
2242/// \param [in] OutputMappings - Extracted functions to erase from module
2243/// once outlining is complete.
2245 Module &M, OutlinableGroup &CurrentGroup,
2246 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2247 std::vector<Function *> &FuncsToRemove,
2248 const DenseMap<Value *, Value *> &OutputMappings) {
2249 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2250
2251 // Move first extracted function's instructions into new function.
2252 LLVM_DEBUG(dbgs() << "Move instructions from "
2253 << *CurrentOS->ExtractedFunction << " to instruction "
2254 << *CurrentGroup.OutlinedFunction << "\n");
2256 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2257
2258 // Transfer the attributes from the function to the new function.
2259 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2260 CurrentGroup.OutlinedFunction->addFnAttr(A);
2261
2262 // Create a new set of output blocks for the first extracted function.
2264 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2265 CurrentGroup.OutlinedFunction, "output_block_0");
2266 CurrentOS->OutputBlockNum = 0;
2267
2268 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2269 replaceConstants(*CurrentOS);
2270
2271 // We first identify if any output blocks are empty, if they are we remove
2272 // them. We then create a branch instruction to the basic block to the return
2273 // block for the function for each non empty output block.
2274 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2275 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2276 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2278 CurrentGroup.EndBBs.find(VToBB.first);
2279 BasicBlock *EndBB = VBBIt->second;
2280 BranchInst::Create(EndBB, VToBB.second);
2281 OutputStoreBBs.back().insert(VToBB);
2282 }
2283 }
2284
2285 // Replace the call to the extracted function with the outlined function.
2286 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2287
2288 // We only delete the extracted functions at the end since we may need to
2289 // reference instructions contained in them for mapping purposes.
2290 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2291}
2292
2293void IROutliner::deduplicateExtractedSections(
2294 Module &M, OutlinableGroup &CurrentGroup,
2295 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2296 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2297
2298 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2299
2300 OutlinableRegion *CurrentOS;
2301
2302 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2303 OutputMappings);
2304
2305 std::vector<Value *> SortedKeys;
2306 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2307 CurrentOS = CurrentGroup.Regions[Idx];
2309 *CurrentOS->ExtractedFunction);
2310
2311 // Create a set of BasicBlocks, one for each return block, to hold the
2312 // needed store instructions.
2315 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2316 "output_block_" + Twine(static_cast<unsigned>(Idx)));
2317 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2318 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2319 CurrentGroup.EndBBs, OutputMappings,
2320 OutputStoreBBs);
2321
2322 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2323 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2324 }
2325
2326 // Create a switch statement to handle the different output schemes.
2327 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2328
2329 OutlinedFunctionNum++;
2330}
2331
2332/// Checks that the next instruction in the InstructionDataList matches the
2333/// next instruction in the module. If they do not, there could be the
2334/// possibility that extra code has been inserted, and we must ignore it.
2335///
2336/// \param ID - The IRInstructionData to check the next instruction of.
2337/// \returns true if the InstructionDataList and actual instruction match.
2339 // We check if there is a discrepancy between the InstructionDataList
2340 // and the actual next instruction in the module. If there is, it means
2341 // that an extra instruction was added, likely by the CodeExtractor.
2342
2343 // Since we do not have any similarity data about this particular
2344 // instruction, we cannot confidently outline it, and must discard this
2345 // candidate.
2346 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2347 Instruction *NextIDLInst = NextIDIt->Inst;
2348 Instruction *NextModuleInst = nullptr;
2349 if (!ID.Inst->isTerminator())
2350 NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2351 else if (NextIDLInst != nullptr)
2352 NextModuleInst =
2353 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2354
2355 if (NextIDLInst && NextIDLInst != NextModuleInst)
2356 return false;
2357
2358 return true;
2359}
2360
2361bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2362 const OutlinableRegion &Region) {
2363 IRSimilarityCandidate *IRSC = Region.Candidate;
2364 unsigned StartIdx = IRSC->getStartIdx();
2365 unsigned EndIdx = IRSC->getEndIdx();
2366
2367 // A check to make sure that we are not about to attempt to outline something
2368 // that has already been outlined.
2369 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2370 if (Outlined.contains(Idx))
2371 return false;
2372
2373 // We check if the recorded instruction matches the actual next instruction,
2374 // if it does not, we fix it in the InstructionDataList.
2375 if (!Region.Candidate->backInstruction()->isTerminator()) {
2376 Instruction *NewEndInst =
2377 Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2378 assert(NewEndInst && "Next instruction is a nullptr?");
2379 if (Region.Candidate->end()->Inst != NewEndInst) {
2380 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2381 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2382 IRInstructionData(*NewEndInst,
2383 InstructionClassifier.visit(*NewEndInst), *IDL);
2384
2385 // Insert the first IRInstructionData of the new region after the
2386 // last IRInstructionData of the IRSimilarityCandidate.
2387 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2388 }
2389 }
2390
2391 return none_of(*IRSC, [this](IRInstructionData &ID) {
2393 return true;
2394
2395 return !this->InstructionClassifier.visit(ID.Inst);
2396 });
2397}
2398
2399void IROutliner::pruneIncompatibleRegions(
2400 std::vector<IRSimilarityCandidate> &CandidateVec,
2401 OutlinableGroup &CurrentGroup) {
2402 bool PreviouslyOutlined;
2403
2404 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2405 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2406 const IRSimilarityCandidate &RHS) {
2407 return LHS.getStartIdx() < RHS.getStartIdx();
2408 });
2409
2410 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2411 // Since outlining a call and a branch instruction will be the same as only
2412 // outlinining a call instruction, we ignore it as a space saving.
2413 if (FirstCandidate.getLength() == 2) {
2414 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2415 isa<BranchInst>(FirstCandidate.back()->Inst))
2416 return;
2417 }
2418
2419 unsigned CurrentEndIdx = 0;
2420 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2421 PreviouslyOutlined = false;
2422 unsigned StartIdx = IRSC.getStartIdx();
2423 unsigned EndIdx = IRSC.getEndIdx();
2424 const Function &FnForCurrCand = *IRSC.getFunction();
2425
2426 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2427 if (Outlined.contains(Idx)) {
2428 PreviouslyOutlined = true;
2429 break;
2430 }
2431
2432 if (PreviouslyOutlined)
2433 continue;
2434
2435 // Check over the instructions, and if the basic block has its address
2436 // taken for use somewhere else, we do not outline that block.
2437 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2438 return ID.Inst->getParent()->hasAddressTaken();
2439 });
2440
2441 if (BBHasAddressTaken)
2442 continue;
2443
2444 if (FnForCurrCand.hasOptNone())
2445 continue;
2446
2447 if (FnForCurrCand.hasFnAttribute("nooutline")) {
2448 LLVM_DEBUG({
2449 dbgs() << "... Skipping function with nooutline attribute: "
2450 << FnForCurrCand.getName() << "\n";
2451 });
2452 continue;
2453 }
2454
2455 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2456 !OutlineFromLinkODRs)
2457 continue;
2458
2459 // Greedily prune out any regions that will overlap with already chosen
2460 // regions.
2461 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2462 continue;
2463
2464 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2466 return true;
2467
2468 return !this->InstructionClassifier.visit(ID.Inst);
2469 });
2470
2471 if (BadInst)
2472 continue;
2473
2474 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2475 OutlinableRegion(IRSC, CurrentGroup);
2476 CurrentGroup.Regions.push_back(OS);
2477
2478 CurrentEndIdx = EndIdx;
2479 }
2480}
2481
2483IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2484 InstructionCost RegionBenefit = 0;
2485 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2486 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2487 // We add the number of instructions in the region to the benefit as an
2488 // estimate as to how much will be removed.
2489 RegionBenefit += Region->getBenefit(TTI);
2490 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2491 << " saved instructions to overfall benefit.\n");
2492 }
2493
2494 return RegionBenefit;
2495}
2496
2497/// For the \p OutputCanon number passed in find the value represented by this
2498/// canonical number. If it is from a PHINode, we pick the first incoming
2499/// value and return that Value instead.
2500///
2501/// \param Region - The OutlinableRegion to get the Value from.
2502/// \param OutputCanon - The canonical number to find the Value from.
2503/// \returns The Value represented by a canonical number \p OutputCanon in \p
2504/// Region.
2506 unsigned OutputCanon) {
2507 OutlinableGroup &CurrentGroup = *Region.Parent;
2508 // If the value is greater than the value in the tracker, we have a
2509 // PHINode and will instead use one of the incoming values to find the
2510 // type.
2511 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2512 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2513 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2514 "Could not find GVN set for PHINode number!");
2515 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2516 OutputCanon = *It->second.second.begin();
2517 }
2518 std::optional<unsigned> OGVN =
2519 Region.Candidate->fromCanonicalNum(OutputCanon);
2520 assert(OGVN && "Could not find GVN for Canonical Number?");
2521 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2522 assert(OV && "Could not find value for GVN?");
2523 return *OV;
2524}
2525
2527IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2528 InstructionCost OverallCost = 0;
2529 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2530 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2531
2532 // Each output incurs a load after the call, so we add that to the cost.
2533 for (unsigned OutputCanon : Region->GVNStores) {
2534 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2535 InstructionCost LoadCost =
2536 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2538
2539 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2540 << " instructions to cost for output of type "
2541 << *V->getType() << "\n");
2542 OverallCost += LoadCost;
2543 }
2544 }
2545
2546 return OverallCost;
2547}
2548
2549/// Find the extra instructions needed to handle any output values for the
2550/// region.
2551///
2552/// \param [in] M - The Module to outline from.
2553/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2554/// \param [in] TTI - The TargetTransformInfo used to collect information for
2555/// new instruction costs.
2556/// \returns the additional cost to handle the outputs.
2558 OutlinableGroup &CurrentGroup,
2560 InstructionCost OutputCost = 0;
2561 unsigned NumOutputBranches = 0;
2562
2563 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2564 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2565 DenseSet<BasicBlock *> CandidateBlocks;
2566 Candidate.getBasicBlocks(CandidateBlocks);
2567
2568 // Count the number of different output branches that point to blocks outside
2569 // of the region.
2570 DenseSet<BasicBlock *> FoundBlocks;
2571 for (IRInstructionData &ID : Candidate) {
2572 if (!isa<BranchInst>(ID.Inst))
2573 continue;
2574
2575 for (Value *V : ID.OperVals) {
2576 BasicBlock *BB = static_cast<BasicBlock *>(V);
2577 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2578 NumOutputBranches++;
2579 }
2580 }
2581
2582 CurrentGroup.BranchesToOutside = NumOutputBranches;
2583
2584 for (const ArrayRef<unsigned> &OutputUse :
2585 CurrentGroup.OutputGVNCombinations) {
2586 for (unsigned OutputCanon : OutputUse) {
2587 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2588 InstructionCost StoreCost =
2589 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2591
2592 // An instruction cost is added for each store set that needs to occur for
2593 // various output combinations inside the function, plus a branch to
2594 // return to the exit block.
2595 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2596 << " instructions to cost for output of type "
2597 << *V->getType() << "\n");
2598 OutputCost += StoreCost * NumOutputBranches;
2599 }
2600
2601 InstructionCost BranchCost =
2603 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2604 << " a branch instruction\n");
2605 OutputCost += BranchCost * NumOutputBranches;
2606 }
2607
2608 // If there is more than one output scheme, we must have a comparison and
2609 // branch for each different item in the switch statement.
2610 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2611 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2612 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2615 InstructionCost BranchCost =
2617
2618 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2619 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2620
2621 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2622 << " instructions for each switch case for each different"
2623 << " output path in a function\n");
2624 OutputCost += TotalCost * NumOutputBranches;
2625 }
2626
2627 return OutputCost;
2628}
2629
2630void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2631 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2632 CurrentGroup.Benefit += RegionBenefit;
2633 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2634
2635 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2636 CurrentGroup.Cost += OutputReloadCost;
2637 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2638
2639 InstructionCost AverageRegionBenefit =
2640 RegionBenefit / CurrentGroup.Regions.size();
2641 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2642 unsigned NumRegions = CurrentGroup.Regions.size();
2644 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2645
2646 // We add one region to the cost once, to account for the instructions added
2647 // inside of the newly created function.
2648 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2649 << " instructions to cost for body of new function.\n");
2650 CurrentGroup.Cost += AverageRegionBenefit;
2651 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2652
2653 // For each argument, we must add an instruction for loading the argument
2654 // out of the register and into a value inside of the newly outlined function.
2655 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2656 << " instructions to cost for each argument in the new"
2657 << " function.\n");
2658 CurrentGroup.Cost +=
2659 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2660 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2661
2662 // Each argument needs to either be loaded into a register or onto the stack.
2663 // Some arguments will only be loaded into the stack once the argument
2664 // registers are filled.
2665 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2666 << " instructions to cost for each argument in the new"
2667 << " function " << NumRegions << " times for the "
2668 << "needed argument handling at the call site.\n");
2669 CurrentGroup.Cost +=
2670 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2671 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2672
2673 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2674 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2675}
2676
2677void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2678 ArrayRef<Value *> Outputs,
2679 LoadInst *LI) {
2680 // For and load instructions following the call
2681 Value *Operand = LI->getPointerOperand();
2682 std::optional<unsigned> OutputIdx;
2683 // Find if the operand it is an output register.
2684 for (unsigned ArgIdx = Region.NumExtractedInputs;
2685 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2686 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2687 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2688 break;
2689 }
2690 }
2691
2692 // If we found an output register, place a mapping of the new value
2693 // to the original in the mapping.
2694 if (!OutputIdx)
2695 return;
2696
2697 if (!OutputMappings.contains(Outputs[*OutputIdx])) {
2698 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2699 << *Outputs[*OutputIdx] << "\n");
2700 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2701 } else {
2702 Value *Orig = OutputMappings.find(Outputs[*OutputIdx])->second;
2703 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2704 << *Outputs[*OutputIdx] << "\n");
2705 OutputMappings.insert(std::make_pair(LI, Orig));
2706 }
2707}
2708
2709bool IROutliner::extractSection(OutlinableRegion &Region) {
2710 SetVector<Value *> ArgInputs, Outputs, SinkCands;
2711 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2712 BasicBlock *InitialStart = Region.StartBB;
2713 Function *OrigF = Region.StartBB->getParent();
2714 CodeExtractorAnalysisCache CEAC(*OrigF);
2715 Region.ExtractedFunction =
2716 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2717
2718 // If the extraction was successful, find the BasicBlock, and reassign the
2719 // OutlinableRegion blocks
2720 if (!Region.ExtractedFunction) {
2721 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2722 << "\n");
2723 Region.reattachCandidate();
2724 return false;
2725 }
2726
2727 // Get the block containing the called branch, and reassign the blocks as
2728 // necessary. If the original block still exists, it is because we ended on
2729 // a branch instruction, and so we move the contents into the block before
2730 // and assign the previous block correctly.
2731 User *InstAsUser = Region.ExtractedFunction->user_back();
2732 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2733 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2734 assert(Region.PrevBB && "PrevBB is nullptr?");
2735 if (Region.PrevBB == InitialStart) {
2736 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2737 Instruction *BI = NewPrev->getTerminator();
2738 BI->eraseFromParent();
2739 moveBBContents(*InitialStart, *NewPrev);
2740 Region.PrevBB = NewPrev;
2741 InitialStart->eraseFromParent();
2742 }
2743
2744 Region.StartBB = RewrittenBB;
2745 Region.EndBB = RewrittenBB;
2746
2747 // The sequences of outlinable regions has now changed. We must fix the
2748 // IRInstructionDataList for consistency. Although they may not be illegal
2749 // instructions, they should not be compared with anything else as they
2750 // should not be outlined in this round. So marking these as illegal is
2751 // allowed.
2752 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2753 Instruction *BeginRewritten = &*RewrittenBB->begin();
2754 Instruction *EndRewritten = &*RewrittenBB->begin();
2755 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2756 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2757 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2758 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2759
2760 // Insert the first IRInstructionData of the new region in front of the
2761 // first IRInstructionData of the IRSimilarityCandidate.
2762 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2763 // Insert the first IRInstructionData of the new region after the
2764 // last IRInstructionData of the IRSimilarityCandidate.
2765 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2766 // Remove the IRInstructionData from the IRSimilarityCandidate.
2767 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2768
2769 assert(RewrittenBB != nullptr &&
2770 "Could not find a predecessor after extraction!");
2771
2772 // Iterate over the new set of instructions to find the new call
2773 // instruction.
2774 for (Instruction &I : *RewrittenBB)
2775 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2776 if (Region.ExtractedFunction == CI->getCalledFunction())
2777 Region.Call = CI;
2778 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2779 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2780 Region.reattachCandidate();
2781 return true;
2782}
2783
2784unsigned IROutliner::doOutline(Module &M) {
2785 // Find the possible similarity sections.
2786 InstructionClassifier.EnableBranches = !DisableBranches;
2787 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2788 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2789
2790 IRSimilarityIdentifier &Identifier = getIRSI(M);
2791 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2792
2793 // Sort them by size of extracted sections
2794 unsigned OutlinedFunctionNum = 0;
2795 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2796 // to sort them by the potential number of instructions to be outlined
2797 if (SimilarityCandidates.size() > 1)
2798 llvm::stable_sort(SimilarityCandidates,
2799 [](const std::vector<IRSimilarityCandidate> &LHS,
2800 const std::vector<IRSimilarityCandidate> &RHS) {
2801 return LHS[0].getLength() * LHS.size() >
2802 RHS[0].getLength() * RHS.size();
2803 });
2804 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2805 // each of the following for loops to avoid making an allocator.
2806 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2807
2808 DenseSet<unsigned> NotSame;
2809 std::vector<OutlinableGroup *> NegativeCostGroups;
2810 std::vector<OutlinableRegion *> OutlinedRegions;
2811 // Iterate over the possible sets of similarity.
2812 unsigned PotentialGroupIdx = 0;
2813 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2814 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2815
2816 // Remove entries that were previously outlined
2817 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2818
2819 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2820 // trying to outlined since there is no compatible similar instance of this
2821 // code.
2822 if (CurrentGroup.Regions.size() < 2)
2823 continue;
2824
2825 // Determine if there are any values that are the same constant throughout
2826 // each section in the set.
2827 NotSame.clear();
2828 CurrentGroup.findSameConstants(NotSame);
2829
2830 if (CurrentGroup.IgnoreGroup)
2831 continue;
2832
2833 // Create a CodeExtractor for each outlinable region. Identify inputs and
2834 // outputs for each section using the code extractor and create the argument
2835 // types for the Aggregate Outlining Function.
2836 OutlinedRegions.clear();
2837 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2838 // Break the outlinable region out of its parent BasicBlock into its own
2839 // BasicBlocks (see function implementation).
2840 OS->splitCandidate();
2841
2842 // There's a chance that when the region is split, extra instructions are
2843 // added to the region. This makes the region no longer viable
2844 // to be split, so we ignore it for outlining.
2845 if (!OS->CandidateSplit)
2846 continue;
2847
2849 DenseSet<BasicBlock *> BlocksInRegion;
2850 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2851 OS->CE = new (ExtractorAllocator.Allocate())
2852 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2853 false, nullptr, "outlined");
2854 findAddInputsOutputs(M, *OS, NotSame);
2855 if (!OS->IgnoreRegion)
2856 OutlinedRegions.push_back(OS);
2857
2858 // We recombine the blocks together now that we have gathered all the
2859 // needed information.
2860 OS->reattachCandidate();
2861 }
2862
2863 CurrentGroup.Regions = std::move(OutlinedRegions);
2864
2865 if (CurrentGroup.Regions.empty())
2866 continue;
2867
2868 CurrentGroup.collectGVNStoreSets(M);
2869
2870 if (CostModel)
2871 findCostBenefit(M, CurrentGroup);
2872
2873 // If we are adhering to the cost model, skip those groups where the cost
2874 // outweighs the benefits.
2875 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2877 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2878 ORE.emit([&]() {
2879 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2880 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2881 C->frontInstruction());
2882 R << "did not outline "
2883 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2884 << " regions due to estimated increase of "
2885 << ore::NV("InstructionIncrease",
2886 CurrentGroup.Cost - CurrentGroup.Benefit)
2887 << " instructions at locations ";
2888 interleave(
2889 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2890 [&R](OutlinableRegion *Region) {
2891 R << ore::NV(
2892 "DebugLoc",
2893 Region->Candidate->frontInstruction()->getDebugLoc());
2894 },
2895 [&R]() { R << " "; });
2896 return R;
2897 });
2898 continue;
2899 }
2900
2901 NegativeCostGroups.push_back(&CurrentGroup);
2902 }
2903
2904 ExtractorAllocator.DestroyAll();
2905
2906 if (NegativeCostGroups.size() > 1)
2907 stable_sort(NegativeCostGroups,
2908 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2909 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2910 });
2911
2912 std::vector<Function *> FuncsToRemove;
2913 for (OutlinableGroup *CG : NegativeCostGroups) {
2914 OutlinableGroup &CurrentGroup = *CG;
2915
2916 OutlinedRegions.clear();
2917 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2918 // We check whether our region is compatible with what has already been
2919 // outlined, and whether we need to ignore this item.
2920 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2921 continue;
2922 OutlinedRegions.push_back(Region);
2923 }
2924
2925 if (OutlinedRegions.size() < 2)
2926 continue;
2927
2928 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2929 // we are still outlining enough regions to make up for the added cost.
2930 CurrentGroup.Regions = std::move(OutlinedRegions);
2931 if (CostModel) {
2932 CurrentGroup.Benefit = 0;
2933 CurrentGroup.Cost = 0;
2934 findCostBenefit(M, CurrentGroup);
2935 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2936 continue;
2937 }
2938 OutlinedRegions.clear();
2939 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2940 Region->splitCandidate();
2941 if (!Region->CandidateSplit)
2942 continue;
2943 OutlinedRegions.push_back(Region);
2944 }
2945
2946 CurrentGroup.Regions = std::move(OutlinedRegions);
2947 if (CurrentGroup.Regions.size() < 2) {
2948 for (OutlinableRegion *R : CurrentGroup.Regions)
2949 R->reattachCandidate();
2950 continue;
2951 }
2952
2953 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2954 << " and benefit " << CurrentGroup.Benefit << "\n");
2955
2956 // Create functions out of all the sections, and mark them as outlined.
2957 OutlinedRegions.clear();
2958 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2960 DenseSet<BasicBlock *> BlocksInRegion;
2961 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2962 OS->CE = new (ExtractorAllocator.Allocate())
2963 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2964 false, nullptr, "outlined");
2965 bool FunctionOutlined = extractSection(*OS);
2966 if (FunctionOutlined) {
2967 unsigned StartIdx = OS->Candidate->getStartIdx();
2968 unsigned EndIdx = OS->Candidate->getEndIdx();
2969 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2970 Outlined.insert(Idx);
2971
2972 OutlinedRegions.push_back(OS);
2973 }
2974 }
2975
2976 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2977 << " with benefit " << CurrentGroup.Benefit
2978 << " and cost " << CurrentGroup.Cost << "\n");
2979
2980 CurrentGroup.Regions = std::move(OutlinedRegions);
2981
2982 if (CurrentGroup.Regions.empty())
2983 continue;
2984
2986 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2987 ORE.emit([&]() {
2988 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2989 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2990 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2991 << " regions with decrease of "
2992 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2993 << " instructions at locations ";
2994 interleave(
2995 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2996 [&R](OutlinableRegion *Region) {
2997 R << ore::NV("DebugLoc",
2998 Region->Candidate->frontInstruction()->getDebugLoc());
2999 },
3000 [&R]() { R << " "; });
3001 return R;
3002 });
3003
3004 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
3005 OutlinedFunctionNum);
3006 }
3007
3008 for (Function *F : FuncsToRemove)
3009 F->eraseFromParent();
3010
3011 return OutlinedFunctionNum;
3012}
3013
3015 CostModel = !NoCostModel;
3016 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
3017
3018 return doOutline(M) > 0;
3019}
3020
3022 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3023
3024 std::function<TargetTransformInfo &(Function &)> GTTI =
3025 [&FAM](Function &F) -> TargetTransformInfo & {
3027 };
3028
3029 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3030 [&AM](Module &M) -> IRSimilarityIdentifier & {
3031 return AM.getResult<IRSimilarityAnalysis>(M);
3032 };
3033
3034 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3035 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3036 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3037 ORE.reset(new OptimizationRemarkEmitter(&F));
3038 return *ORE;
3039 };
3040
3041 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3042 return PreservedAnalyses::none();
3043 return PreservedAnalyses::all();
3044}
ReachingDefAnalysis InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1272
bool End
Definition: ELF_riscv.cpp:469
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:496
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:833
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:156
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:709
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:863
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs, std::vector< Function * > &FuncsToRemove, const DenseMap< Value *, Value * > &OutputMappings)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:550
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:166
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
Definition: IROutliner.cpp:221
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:934
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:808
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:621
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
Definition: IROutliner.cpp:531
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:464
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
Definition: IROutliner.cpp:778
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
AttributeSet getFnAttrs() const
The function attributes are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:511
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:393
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:318
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:414
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:334
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:296
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:132
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
size_t size() const
Definition: BasicBlock.h:345
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:322
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:247
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1691
This is an important base class in LLVM.
Definition: Constant.h:41
Debug location.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Class to represent function types.
Definition: DerivedTypes.h:103
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:555
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:138
const BasicBlock & getEntryBlock() const
Definition: Function.h:747
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
const BasicBlock & back() const
Definition: Function.h:772
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:315
size_t size() const
Definition: Function.h:768
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:641
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:583
size_t arg_size() const
Definition: Function.h:811
Argument * getArg(unsigned i) const
Definition: Function.h:796
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:645
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:514
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
bool isTerminator() const
Definition: Instruction.h:198
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition: Instruction.cpp:99
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:119
Root of the metadata hierarchy.
Definition: Metadata.h:61
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
iterator begin()
Definition: RegionInfo.h:560
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
iterator end()
Definition: RegionInfo.h:561
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition: SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition: SetVector.h:84
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
User * user_back()
Definition: Value.h:407
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:543
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1090
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
bool erase(const ValueT &V)
Definition: DenseSet.h:101
An opaque object representing a hash code.
Definition: Hashing.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:194
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:157
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1971
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:128
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:2083
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:42
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:38
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:46
@ Other
Any other memory.
auto predecessors(const MachineBasicBlock *BB)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:613
void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
Definition: DebugInfo.cpp:365
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:72
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:135
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:78
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:98
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:599
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:131
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
Definition: IROutliner.cpp:123
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
Definition: IROutliner.cpp:119
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:74
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
Definition: IROutliner.cpp:124
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:86
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:89
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:114
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:82
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:102
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:93
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:80
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:110
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:606
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:128
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:106
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:487
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:249
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:187
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:199
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:376