LLVM 20.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
27#include <optional>
28#include <vector>
29
30#define DEBUG_TYPE "iroutliner"
31
32using namespace llvm;
33using namespace IRSimilarity;
34
35// A command flag to be used for debugging to exclude branches from similarity
36// matching and outlining.
37namespace llvm {
39
40// A command flag to be used for debugging to indirect calls from similarity
41// matching and outlining.
43
44// A command flag to be used for debugging to exclude intrinsics from similarity
45// matching and outlining.
47
48} // namespace llvm
49
50// Set to true if the user wants the ir outliner to run on linkonceodr linkage
51// functions. This is false by default because the linker can dedupe linkonceodr
52// functions. Since the outliner is confined to a single module (modulo LTO),
53// this is off by default. It should, however, be the default behavior in
54// LTO.
56 "enable-linkonceodr-ir-outlining", cl::Hidden,
57 cl::desc("Enable the IR outliner on linkonceodr functions"),
58 cl::init(false));
59
60// This is a debug option to test small pieces of code to ensure that outlining
61// works correctly.
63 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
64 cl::desc("Debug option to outline greedily, without restriction that "
65 "calculated benefit outweighs cost"));
66
67/// The OutlinableGroup holds all the overarching information for outlining
68/// a set of regions that are structurally similar to one another, such as the
69/// types of the overall function, the output blocks, the sets of stores needed
70/// and a list of the different regions. This information is used in the
71/// deduplication of extracted regions with the same structure.
73 /// The sections that could be outlined
74 std::vector<OutlinableRegion *> Regions;
75
76 /// The argument types for the function created as the overall function to
77 /// replace the extracted function for each region.
78 std::vector<Type *> ArgumentTypes;
79 /// The FunctionType for the overall function.
81 /// The Function for the collective overall function.
83
84 /// Flag for whether we should not consider this group of OutlinableRegions
85 /// for extraction.
86 bool IgnoreGroup = false;
87
88 /// The return blocks for the overall function.
90
91 /// The PHIBlocks with their corresponding return block based on the return
92 /// value as the key.
94
95 /// A set containing the different GVN store sets needed. Each array contains
96 /// a sorted list of the different values that need to be stored into output
97 /// registers.
99
100 /// Flag for whether the \ref ArgumentTypes have been defined after the
101 /// extraction of the first region.
102 bool InputTypesSet = false;
103
104 /// The number of input values in \ref ArgumentTypes. Anything after this
105 /// index in ArgumentTypes is an output argument.
106 unsigned NumAggregateInputs = 0;
107
108 /// The mapping of the canonical numbering of the values in outlined sections
109 /// to specific arguments.
111
112 /// The number of branches in the region target a basic block that is outside
113 /// of the region.
114 unsigned BranchesToOutside = 0;
115
116 /// Tracker counting backwards from the highest unsigned value possible to
117 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
118 /// -2 and -1 are assigned by the DenseMap.
119 unsigned PHINodeGVNTracker = -3;
120
122 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
125
126 /// The number of instructions that will be outlined by extracting \ref
127 /// Regions.
129 /// The number of added instructions needed for the outlining of the \ref
130 /// Regions.
132
133 /// The argument that needs to be marked with the swifterr attribute. If not
134 /// needed, there is no value.
135 std::optional<unsigned> SwiftErrorArgument;
136
137 /// For the \ref Regions, we look at every Value. If it is a constant,
138 /// we check whether it is the same in Region.
139 ///
140 /// \param [in,out] NotSame contains the global value numbers where the
141 /// constant is not always the same, and must be passed in as an argument.
143
144 /// For the regions, look at each set of GVN stores needed and account for
145 /// each combination. Add an argument to the argument types if there is
146 /// more than one combination.
147 ///
148 /// \param [in] M - The module we are outlining from.
150};
151
152/// Move the contents of \p SourceBB to before the last instruction of \p
153/// TargetBB.
154/// \param SourceBB - the BasicBlock to pull Instructions from.
155/// \param TargetBB - the BasicBlock to put Instruction into.
156static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
157 TargetBB.splice(TargetBB.end(), &SourceBB);
158}
159
160/// A function to sort the keys of \p Map, which must be a mapping of constant
161/// values to basic blocks and return it in \p SortedKeys
162///
163/// \param SortedKeys - The vector the keys will be return in and sorted.
164/// \param Map - The DenseMap containing keys to sort.
165static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
167 for (auto &VtoBB : Map)
168 SortedKeys.push_back(VtoBB.first);
169
170 // Here we expect to have either 1 value that is void (nullptr) or multiple
171 // values that are all constant integers.
172 if (SortedKeys.size() == 1) {
173 assert(!SortedKeys[0] && "Expected a single void value.");
174 return;
175 }
176
177 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
178 assert(LHS && RHS && "Expected non void values.");
179 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
180 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
181
182 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
183 });
184}
185
187 Value *V) {
188 std::optional<unsigned> GVN = Candidate->getGVN(V);
189 assert(GVN && "No GVN for incoming value");
190 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
191 std::optional<unsigned> FirstGVN =
192 Other.Candidate->fromCanonicalNum(*CanonNum);
193 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
194 return FoundValueOpt.value_or(nullptr);
195}
196
199 BasicBlock *BB) {
200 Instruction *FirstNonPHI = BB->getFirstNonPHIOrDbg();
201 assert(FirstNonPHI && "block is empty?");
202 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
203 if (!CorrespondingVal)
204 return nullptr;
205 BasicBlock *CorrespondingBlock =
206 cast<Instruction>(CorrespondingVal)->getParent();
207 return CorrespondingBlock;
208}
209
210/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
211/// in \p Included to branch to BasicBlock \p Replace if they currently branch
212/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
213/// when PHINodes are included in outlined regions.
214///
215/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
216/// checked.
217/// \param Find - The successor block to be replaced.
218/// \param Replace - The new succesor block to branch to.
219/// \param Included - The set of blocks about to be outlined.
221 BasicBlock *Replace,
222 DenseSet<BasicBlock *> &Included) {
223 for (PHINode &PN : PHIBlock->phis()) {
224 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
225 ++Idx) {
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 BasicBlock *Incoming = PN.getIncomingBlock(Idx);
229 if (!Included.contains(Incoming))
230 continue;
231
232 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
233 assert(BI && "Not a branch instruction?");
234 // Look over the branching instructions into this block to see if we
235 // used to branch to Find in this outlined block.
236 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
237 Succ++) {
238 // If we have found the block to replace, we do so here.
239 if (BI->getSuccessor(Succ) != Find)
240 continue;
241 BI->setSuccessor(Succ, Replace);
242 }
243 }
244 }
245}
246
247
249 assert(!CandidateSplit && "Candidate already split!");
250
252
253 Instruction *EndInst = nullptr;
254 // Check whether the last instruction is a terminator, if it is, we do
255 // not split on the following instruction. We leave the block as it is. We
256 // also check that this is not the last instruction in the Module, otherwise
257 // the check for whether the current following instruction matches the
258 // previously recorded instruction will be incorrect.
259 if (!BackInst->isTerminator() ||
260 BackInst->getParent() != &BackInst->getFunction()->back()) {
261 EndInst = Candidate->end()->Inst;
262 assert(EndInst && "Expected an end instruction?");
263 }
264
265 // We check if the current instruction following the last instruction in the
266 // region is the same as the recorded instruction following the last
267 // instruction. If they do not match, there could be problems in rewriting
268 // the program after outlining, so we ignore it.
269 if (!BackInst->isTerminator() &&
270 EndInst != BackInst->getNextNonDebugInstruction())
271 return;
272
273 Instruction *StartInst = (*Candidate->begin()).Inst;
274 assert(StartInst && "Expected a start instruction?");
275 StartBB = StartInst->getParent();
276 PrevBB = StartBB;
277
280
281 // We iterate over the instructions in the region, if we find a PHINode, we
282 // check if there are predecessors outside of the region, if there are,
283 // we ignore this region since we are unable to handle the severing of the
284 // phi node right now.
285
286 // TODO: Handle extraneous inputs for PHINodes through variable number of
287 // inputs, similar to how outputs are handled.
288 BasicBlock::iterator It = StartInst->getIterator();
289 EndBB = BackInst->getParent();
290 BasicBlock *IBlock;
291 BasicBlock *PHIPredBlock = nullptr;
292 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
293 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
294 unsigned NumPredsOutsideRegion = 0;
295 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
296 if (!BBSet.contains(PN->getIncomingBlock(i))) {
297 PHIPredBlock = PN->getIncomingBlock(i);
298 ++NumPredsOutsideRegion;
299 continue;
300 }
301
302 // We must consider the case there the incoming block to the PHINode is
303 // the same as the final block of the OutlinableRegion. If this is the
304 // case, the branch from this block must also be outlined to be valid.
305 IBlock = PN->getIncomingBlock(i);
306 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
307 PHIPredBlock = PN->getIncomingBlock(i);
308 ++NumPredsOutsideRegion;
309 }
310 }
311
312 if (NumPredsOutsideRegion > 1)
313 return;
314
315 It++;
316 }
317
318 // If the region starts with a PHINode, but is not the initial instruction of
319 // the BasicBlock, we ignore this region for now.
320 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
321 return;
322
323 // If the region ends with a PHINode, but does not contain all of the phi node
324 // instructions of the region, we ignore it for now.
325 if (isa<PHINode>(BackInst) &&
326 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
327 return;
328
329 // The basic block gets split like so:
330 // block: block:
331 // inst1 inst1
332 // inst2 inst2
333 // region1 br block_to_outline
334 // region2 block_to_outline:
335 // region3 -> region1
336 // region4 region2
337 // inst3 region3
338 // inst4 region4
339 // br block_after_outline
340 // block_after_outline:
341 // inst3
342 // inst4
343
344 std::string OriginalName = PrevBB->getName().str();
345
346 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
348 // If there was a PHINode with an incoming block outside the region,
349 // make sure is correctly updated in the newly split block.
350 if (PHIPredBlock)
352
353 CandidateSplit = true;
354 if (!BackInst->isTerminator()) {
355 EndBB = EndInst->getParent();
356 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
359 } else {
360 EndBB = BackInst->getParent();
361 EndsInBranch = true;
362 FollowBB = nullptr;
363 }
364
365 // Refind the basic block set.
366 BBSet.clear();
368 // For the phi nodes in the new starting basic block of the region, we
369 // reassign the targets of the basic blocks branching instructions.
371 if (FollowBB)
373}
374
376 assert(CandidateSplit && "Candidate is not split!");
377
378 // The basic block gets reattached like so:
379 // block: block:
380 // inst1 inst1
381 // inst2 inst2
382 // br block_to_outline region1
383 // block_to_outline: -> region2
384 // region1 region3
385 // region2 region4
386 // region3 inst3
387 // region4 inst4
388 // br block_after_outline
389 // block_after_outline:
390 // inst3
391 // inst4
392 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
393
394 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
395 // Make sure PHINode references to the block we are merging into are
396 // updated to be incoming blocks from the predecessor to the current block.
397
398 // NOTE: If this is updated such that the outlined block can have more than
399 // one incoming block to a PHINode, this logic will have to updated
400 // to handle multiple precessors instead.
401
402 // We only need to update this if the outlined section contains a PHINode, if
403 // it does not, then the incoming block was never changed in the first place.
404 // On the other hand, if PrevBB has no predecessors, it means that all
405 // incoming blocks to the first block are contained in the region, and there
406 // will be nothing to update.
407 Instruction *StartInst = (*Candidate->begin()).Inst;
408 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
410 "PrevBB has more than one predecessor. Should be 0 or 1.");
411 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
413 }
415
416 // If we reattaching after outlining, we iterate over the phi nodes to
417 // the initial block, and reassign the branch instructions of the incoming
418 // blocks to the block we are remerging into.
419 if (!ExtractedFunction) {
422
424 if (!EndsInBranch)
426 }
427
429
430 BasicBlock *PlacementBB = PrevBB;
431 if (StartBB != EndBB)
432 PlacementBB = EndBB;
433 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
434 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
435 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
436 PlacementBB->getTerminator()->eraseFromParent();
437 moveBBContents(*FollowBB, *PlacementBB);
438 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
440 }
441
444
445 // Make sure to save changes back to the StartBB.
446 StartBB = PrevBB;
447 EndBB = nullptr;
448 PrevBB = nullptr;
449 FollowBB = nullptr;
450
451 CandidateSplit = false;
452}
453
454/// Find whether \p V matches the Constants previously found for the \p GVN.
455///
456/// \param V - The value to check for consistency.
457/// \param GVN - The global value number assigned to \p V.
458/// \param GVNToConstant - The mapping of global value number to Constants.
459/// \returns true if the Value matches the Constant mapped to by V and false if
460/// it \p V is a Constant but does not match.
461/// \returns std::nullopt if \p V is not a Constant.
462static std::optional<bool>
463constantMatches(Value *V, unsigned GVN,
464 DenseMap<unsigned, Constant *> &GVNToConstant) {
465 // See if we have a constants
466 Constant *CST = dyn_cast<Constant>(V);
467 if (!CST)
468 return std::nullopt;
469
470 // Holds a mapping from a global value number to a Constant.
472 bool Inserted;
473
474
475 // If we have a constant, try to make a new entry in the GVNToConstant.
476 std::tie(GVNToConstantIt, Inserted) =
477 GVNToConstant.insert(std::make_pair(GVN, CST));
478 // If it was found and is not equal, it is not the same. We do not
479 // handle this case yet, and exit early.
480 if (Inserted || (GVNToConstantIt->second == CST))
481 return true;
482
483 return false;
484}
485
487 InstructionCost Benefit = 0;
488
489 // Estimate the benefit of outlining a specific sections of the program. We
490 // delegate mostly this task to the TargetTransformInfo so that if the target
491 // has specific changes, we can have a more accurate estimate.
492
493 // However, getInstructionCost delegates the code size calculation for
494 // arithmetic instructions to getArithmeticInstrCost in
495 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
496 // code size for a division and remainder instruction to be equal to 4, and
497 // everything else to 1. This is not an accurate representation of the
498 // division instruction for targets that have a native division instruction.
499 // To be overly conservative, we only add 1 to the number of instructions for
500 // each division instruction.
501 for (IRInstructionData &ID : *Candidate) {
502 Instruction *I = ID.Inst;
503 switch (I->getOpcode()) {
504 case Instruction::FDiv:
505 case Instruction::FRem:
506 case Instruction::SDiv:
507 case Instruction::SRem:
508 case Instruction::UDiv:
509 case Instruction::URem:
510 Benefit += 1;
511 break;
512 default:
514 break;
515 }
516 }
517
518 return Benefit;
519}
520
521/// Check the \p OutputMappings structure for value \p Input, if it exists
522/// it has been used as an output for outlining, and has been renamed, and we
523/// return the new value, otherwise, we return the same value.
524///
525/// \param OutputMappings [in] - The mapping of values to their renamed value
526/// after being used as an output for an outlined region.
527/// \param Input [in] - The value to find the remapped value of, if it exists.
528/// \return The remapped value if it has been renamed, and the same value if has
529/// not.
531 Value *Input) {
533 OutputMappings.find(Input);
534 if (OutputMapping != OutputMappings.end())
535 return OutputMapping->second;
536 return Input;
537}
538
539/// Find whether \p Region matches the global value numbering to Constant
540/// mapping found so far.
541///
542/// \param Region - The OutlinableRegion we are checking for constants
543/// \param GVNToConstant - The mapping of global value number to Constants.
544/// \param NotSame - The set of global value numbers that do not have the same
545/// constant in each region.
546/// \returns true if all Constants are the same in every use of a Constant in \p
547/// Region and false if not
548static bool
550 DenseMap<unsigned, Constant *> &GVNToConstant,
551 DenseSet<unsigned> &NotSame) {
552 bool ConstantsTheSame = true;
553
554 IRSimilarityCandidate &C = *Region.Candidate;
555 for (IRInstructionData &ID : C) {
556
557 // Iterate over the operands in an instruction. If the global value number,
558 // assigned by the IRSimilarityCandidate, has been seen before, we check if
559 // the number has been found to be not the same value in each instance.
560 for (Value *V : ID.OperVals) {
561 std::optional<unsigned> GVNOpt = C.getGVN(V);
562 assert(GVNOpt && "Expected a GVN for operand?");
563 unsigned GVN = *GVNOpt;
564
565 // Check if this global value has been found to not be the same already.
566 if (NotSame.contains(GVN)) {
567 if (isa<Constant>(V))
568 ConstantsTheSame = false;
569 continue;
570 }
571
572 // If it has been the same so far, we check the value for if the
573 // associated Constant value match the previous instances of the same
574 // global value number. If the global value does not map to a Constant,
575 // it is considered to not be the same value.
576 std::optional<bool> ConstantMatches =
577 constantMatches(V, GVN, GVNToConstant);
578 if (ConstantMatches) {
579 if (*ConstantMatches)
580 continue;
581 else
582 ConstantsTheSame = false;
583 }
584
585 // While this value is a register, it might not have been previously,
586 // make sure we don't already have a constant mapped to this global value
587 // number.
588 if (GVNToConstant.contains(GVN))
589 ConstantsTheSame = false;
590
591 NotSame.insert(GVN);
592 }
593 }
594
595 return ConstantsTheSame;
596}
597
599 DenseMap<unsigned, Constant *> GVNToConstant;
600
601 for (OutlinableRegion *Region : Regions)
602 collectRegionsConstants(*Region, GVNToConstant, NotSame);
603}
604
606 for (OutlinableRegion *OS : Regions)
607 OutputGVNCombinations.insert(OS->GVNStores);
608
609 // We are adding an extracted argument to decide between which output path
610 // to use in the basic block. It is used in a switch statement and only
611 // needs to be an integer.
612 if (OutputGVNCombinations.size() > 1)
613 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
614}
615
616/// Get the subprogram if it exists for one of the outlined regions.
617///
618/// \param [in] Group - The set of regions to find a subprogram for.
619/// \returns the subprogram if it exists, or nullptr.
621 for (OutlinableRegion *OS : Group.Regions)
622 if (Function *F = OS->Call->getFunction())
623 if (DISubprogram *SP = F->getSubprogram())
624 return SP;
625
626 return nullptr;
627}
628
629Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
630 unsigned FunctionNameSuffix) {
631 assert(!Group.OutlinedFunction && "Function is already defined!");
632
633 Type *RetTy = Type::getVoidTy(M.getContext());
634 // All extracted functions _should_ have the same return type at this point
635 // since the similarity identifier ensures that all branches outside of the
636 // region occur in the same place.
637
638 // NOTE: Should we ever move to the model that uses a switch at every point
639 // needed, meaning that we could branch within the region or out, it is
640 // possible that we will need to switch to using the most general case all of
641 // the time.
642 for (OutlinableRegion *R : Group.Regions) {
643 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
644 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
645 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
646 RetTy = ExtractedFuncType;
647 }
648
650 RetTy, Group.ArgumentTypes, false);
651
652 // These functions will only be called from within the same module, so
653 // we can set an internal linkage.
656 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
657
658 // Transfer the swifterr attribute to the correct function parameter.
659 if (Group.SwiftErrorArgument)
661 Attribute::SwiftError);
662
663 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
664 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
665
666 // If there's a DISubprogram associated with this outlined function, then
667 // emit debug info for the outlined function.
668 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
669 Function *F = Group.OutlinedFunction;
670 // We have a DISubprogram. Get its DICompileUnit.
671 DICompileUnit *CU = SP->getUnit();
672 DIBuilder DB(M, true, CU);
673 DIFile *Unit = SP->getFile();
674 Mangler Mg;
675 // Get the mangled name of the function for the linkage name.
676 std::string Dummy;
677 llvm::raw_string_ostream MangledNameStream(Dummy);
678 Mg.getNameWithPrefix(MangledNameStream, F, false);
679
680 DISubprogram *OutlinedSP = DB.createFunction(
681 Unit /* Context */, F->getName(), Dummy, Unit /* File */,
682 0 /* Line 0 is reserved for compiler-generated code. */,
683 DB.createSubroutineType(DB.getOrCreateTypeArray({})), /* void type */
684 0, /* Line 0 is reserved for compiler-generated code. */
685 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
686 /* Outlined code is optimized code by definition. */
687 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
688
689 // Don't add any new variables to the subprogram.
690 DB.finalizeSubprogram(OutlinedSP);
691
692 // Attach subprogram to the function.
693 F->setSubprogram(OutlinedSP);
694 // We're done with the DIBuilder.
695 DB.finalize();
696 }
697
698 return Group.OutlinedFunction;
699}
700
701/// Move each BasicBlock in \p Old to \p New.
702///
703/// \param [in] Old - The function to move the basic blocks from.
704/// \param [in] New - The function to move the basic blocks to.
705/// \param [out] NewEnds - The return blocks of the new overall function.
706static void moveFunctionData(Function &Old, Function &New,
708 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
709 CurrBB.removeFromParent();
710 CurrBB.insertInto(&New);
711 Instruction *I = CurrBB.getTerminator();
712
713 // For each block we find a return instruction is, it is a potential exit
714 // path for the function. We keep track of each block based on the return
715 // value here.
716 if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
717 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
718
719 std::vector<Instruction *> DebugInsts;
720
721 for (Instruction &Val : CurrBB) {
722 // Since debug-info originates from many different locations in the
723 // program, it will cause incorrect reporting from a debugger if we keep
724 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
725 // here, collect intrinsics for removal later.
726 Val.dropDbgRecords();
727
728 // We must handle the scoping of called functions differently than
729 // other outlined instructions.
730 if (!isa<CallInst>(&Val)) {
731 // Remove the debug information for outlined functions.
732 Val.setDebugLoc(DebugLoc());
733
734 // Loop info metadata may contain line locations. Update them to have no
735 // value in the new subprogram since the outlined code could be from
736 // several locations.
737 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
738 if (DISubprogram *SP = New.getSubprogram())
739 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
740 return DILocation::get(New.getContext(), Loc->getLine(),
741 Loc->getColumn(), SP, nullptr);
742 return MD;
743 };
744 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
745 continue;
746 }
747
748 // From this point we are only handling call instructions.
749 CallInst *CI = cast<CallInst>(&Val);
750
751 // Collect debug intrinsics for later removal.
752 if (isa<DbgInfoIntrinsic>(CI)) {
753 DebugInsts.push_back(&Val);
754 continue;
755 }
756
757 // Edit the scope of called functions inside of outlined functions.
758 if (DISubprogram *SP = New.getSubprogram()) {
759 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
760 Val.setDebugLoc(DI);
761 }
762 }
763
764 for (Instruction *I : DebugInsts)
765 I->eraseFromParent();
766 }
767}
768
769/// Find the constants that will need to be lifted into arguments
770/// as they are not the same in each instance of the region.
771///
772/// \param [in] C - The IRSimilarityCandidate containing the region we are
773/// analyzing.
774/// \param [in] NotSame - The set of global value numbers that do not have a
775/// single Constant across all OutlinableRegions similar to \p C.
776/// \param [out] Inputs - The list containing the global value numbers of the
777/// arguments needed for the region of code.
779 std::vector<unsigned> &Inputs) {
781 // Iterate over the instructions, and find what constants will need to be
782 // extracted into arguments.
783 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
784 IDIt != EndIDIt; IDIt++) {
785 for (Value *V : (*IDIt).OperVals) {
786 // Since these are stored before any outlining, they will be in the
787 // global value numbering.
788 unsigned GVN = *C.getGVN(V);
789 if (isa<Constant>(V))
790 if (NotSame.contains(GVN) && Seen.insert(GVN).second)
791 Inputs.push_back(GVN);
792 }
793 }
794}
795
796/// Find the GVN for the inputs that have been found by the CodeExtractor.
797///
798/// \param [in] C - The IRSimilarityCandidate containing the region we are
799/// analyzing.
800/// \param [in] CurrentInputs - The set of inputs found by the
801/// CodeExtractor.
802/// \param [in] OutputMappings - The mapping of values that have been replaced
803/// by a new output value.
804/// \param [out] EndInputNumbers - The global value numbers for the extracted
805/// arguments.
807 SetVector<Value *> &CurrentInputs,
808 const DenseMap<Value *, Value *> &OutputMappings,
809 std::vector<unsigned> &EndInputNumbers) {
810 // Get the Global Value Number for each input. We check if the Value has been
811 // replaced by a different value at output, and use the original value before
812 // replacement.
813 for (Value *Input : CurrentInputs) {
814 assert(Input && "Have a nullptr as an input");
815 auto It = OutputMappings.find(Input);
816 if (It != OutputMappings.end())
817 Input = It->second;
818 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
819 EndInputNumbers.push_back(*C.getGVN(Input));
820 }
821}
822
823/// Find the original value for the \p ArgInput values if any one of them was
824/// replaced during a previous extraction.
825///
826/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
827/// \param [in] OutputMappings - The mapping of values that have been replaced
828/// by a new output value.
829/// \param [out] RemappedArgInputs - The remapped values according to
830/// \p OutputMappings that will be extracted.
831static void
833 const DenseMap<Value *, Value *> &OutputMappings,
834 SetVector<Value *> &RemappedArgInputs) {
835 // Get the global value number for each input that will be extracted as an
836 // argument by the code extractor, remapping if needed for reloaded values.
837 for (Value *Input : ArgInputs) {
838 auto It = OutputMappings.find(Input);
839 if (It != OutputMappings.end())
840 Input = It->second;
841 RemappedArgInputs.insert(Input);
842 }
843}
844
845/// Find the input GVNs and the output values for a region of Instructions.
846/// Using the code extractor, we collect the inputs to the extracted function.
847///
848/// The \p Region can be identified as needing to be ignored in this function.
849/// It should be checked whether it should be ignored after a call to this
850/// function.
851///
852/// \param [in,out] Region - The region of code to be analyzed.
853/// \param [out] InputGVNs - The global value numbers for the extracted
854/// arguments.
855/// \param [in] NotSame - The global value numbers in the region that do not
856/// have the same constant value in the regions structurally similar to
857/// \p Region.
858/// \param [in] OutputMappings - The mapping of values that have been replaced
859/// by a new output value after extraction.
860/// \param [out] ArgInputs - The values of the inputs to the extracted function.
861/// \param [out] Outputs - The set of values extracted by the CodeExtractor
862/// as outputs.
864 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
865 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
866 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
867 IRSimilarityCandidate &C = *Region.Candidate;
868
869 // OverallInputs are the inputs to the region found by the CodeExtractor,
870 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
871 // allocas of values whose lifetimes are contained completely within the
872 // outlined region. PremappedInputs are the arguments found by the
873 // CodeExtractor, removing conditions such as sunken allocas, but that
874 // may need to be remapped due to the extracted output values replacing
875 // the original values. We use DummyOutputs for this first run of finding
876 // inputs and outputs since the outputs could change during findAllocas,
877 // the correct set of extracted outputs will be in the final Outputs ValueSet.
878 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
879 DummyOutputs;
880
881 // Use the code extractor to get the inputs and outputs, without sunken
882 // allocas or removing llvm.assumes.
883 CodeExtractor *CE = Region.CE;
884 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
885 assert(Region.StartBB && "Region must have a start BasicBlock!");
886 Function *OrigF = Region.StartBB->getParent();
887 CodeExtractorAnalysisCache CEAC(*OrigF);
888 BasicBlock *Dummy = nullptr;
889
890 // The region may be ineligible due to VarArgs in the parent function. In this
891 // case we ignore the region.
892 if (!CE->isEligible()) {
893 Region.IgnoreRegion = true;
894 return;
895 }
896
897 // Find if any values are going to be sunk into the function when extracted
898 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
899 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
900
901 // TODO: Support regions with sunken allocas: values whose lifetimes are
902 // contained completely within the outlined region. These are not guaranteed
903 // to be the same in every region, so we must elevate them all to arguments
904 // when they appear. If these values are not equal, it means there is some
905 // Input in OverallInputs that was removed for ArgInputs.
906 if (OverallInputs.size() != PremappedInputs.size()) {
907 Region.IgnoreRegion = true;
908 return;
909 }
910
911 findConstants(C, NotSame, InputGVNs);
912
913 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
914
915 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
916 ArgInputs);
917
918 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
919 // we need to make sure they are in a deterministic order.
920 stable_sort(InputGVNs);
921}
922
923/// Look over the inputs and map each input argument to an argument in the
924/// overall function for the OutlinableRegions. This creates a way to replace
925/// the arguments of the extracted function with the arguments of the new
926/// overall function.
927///
928/// \param [in,out] Region - The region of code to be analyzed.
929/// \param [in] InputGVNs - The global value numbering of the input values
930/// collected.
931/// \param [in] ArgInputs - The values of the arguments to the extracted
932/// function.
933static void
935 std::vector<unsigned> &InputGVNs,
936 SetVector<Value *> &ArgInputs) {
937
938 IRSimilarityCandidate &C = *Region.Candidate;
939 OutlinableGroup &Group = *Region.Parent;
940
941 // This counts the argument number in the overall function.
942 unsigned TypeIndex = 0;
943
944 // This counts the argument number in the extracted function.
945 unsigned OriginalIndex = 0;
946
947 // Find the mapping of the extracted arguments to the arguments for the
948 // overall function. Since there may be extra arguments in the overall
949 // function to account for the extracted constants, we have two different
950 // counters as we find extracted arguments, and as we come across overall
951 // arguments.
952
953 // Additionally, in our first pass, for the first extracted function,
954 // we find argument locations for the canonical value numbering. This
955 // numbering overrides any discovered location for the extracted code.
956 for (unsigned InputVal : InputGVNs) {
957 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
958 assert(CanonicalNumberOpt && "Canonical number not found?");
959 unsigned CanonicalNumber = *CanonicalNumberOpt;
960
961 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
962 assert(InputOpt && "Global value number not found?");
963 Value *Input = *InputOpt;
964
966 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
967
968 if (!Group.InputTypesSet) {
969 Group.ArgumentTypes.push_back(Input->getType());
970 // If the input value has a swifterr attribute, make sure to mark the
971 // argument in the overall function.
972 if (Input->isSwiftError()) {
973 assert(
974 !Group.SwiftErrorArgument &&
975 "Argument already marked with swifterr for this OutlinableGroup!");
976 Group.SwiftErrorArgument = TypeIndex;
977 }
978 }
979
980 // Check if we have a constant. If we do add it to the overall argument
981 // number to Constant map for the region, and continue to the next input.
982 if (Constant *CST = dyn_cast<Constant>(Input)) {
983 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
984 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
985 else {
987 std::make_pair(CanonicalNumber, TypeIndex));
988 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
989 }
990 TypeIndex++;
991 continue;
992 }
993
994 // It is not a constant, we create the mapping from extracted argument list
995 // to the overall argument list, using the canonical location, if it exists.
996 assert(ArgInputs.count(Input) && "Input cannot be found!");
997
998 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
999 if (OriginalIndex != AggArgIt->second)
1000 Region.ChangedArgOrder = true;
1001 Region.ExtractedArgToAgg.insert(
1002 std::make_pair(OriginalIndex, AggArgIt->second));
1003 Region.AggArgToExtracted.insert(
1004 std::make_pair(AggArgIt->second, OriginalIndex));
1005 } else {
1007 std::make_pair(CanonicalNumber, TypeIndex));
1008 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
1009 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
1010 }
1011 OriginalIndex++;
1012 TypeIndex++;
1013 }
1014
1015 // If the function type definitions for the OutlinableGroup holding the region
1016 // have not been set, set the length of the inputs here. We should have the
1017 // same inputs for all of the different regions contained in the
1018 // OutlinableGroup since they are all structurally similar to one another.
1019 if (!Group.InputTypesSet) {
1020 Group.NumAggregateInputs = TypeIndex;
1021 Group.InputTypesSet = true;
1022 }
1023
1024 Region.NumExtractedInputs = OriginalIndex;
1025}
1026
1027/// Check if the \p V has any uses outside of the region other than \p PN.
1028///
1029/// \param V [in] - The value to check.
1030/// \param PHILoc [in] - The location in the PHINode of \p V.
1031/// \param PN [in] - The PHINode using \p V.
1032/// \param Exits [in] - The potential blocks we exit to from the outlined
1033/// region.
1034/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1035/// \returns true if \p V has any use soutside its region other than \p PN.
1036static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1038 DenseSet<BasicBlock *> &BlocksInRegion) {
1039 // We check to see if the value is used by the PHINode from some other
1040 // predecessor not included in the region. If it is, we make sure
1041 // to keep it as an output.
1042 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1043 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1044 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1045 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1046 }))
1047 return true;
1048
1049 // Check if the value is used by any other instructions outside the region.
1050 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1051 Instruction *I = dyn_cast<Instruction>(U);
1052 if (!I)
1053 return false;
1054
1055 // If the use of the item is inside the region, we skip it. Uses
1056 // inside the region give us useful information about how the item could be
1057 // used as an output.
1058 BasicBlock *Parent = I->getParent();
1059 if (BlocksInRegion.contains(Parent))
1060 return false;
1061
1062 // If it's not a PHINode then we definitely know the use matters. This
1063 // output value will not completely combined with another item in a PHINode
1064 // as it is directly reference by another non-phi instruction
1065 if (!isa<PHINode>(I))
1066 return true;
1067
1068 // If we have a PHINode outside one of the exit locations, then it
1069 // can be considered an outside use as well. If there is a PHINode
1070 // contained in the Exit where this values use matters, it will be
1071 // caught when we analyze that PHINode.
1072 if (!Exits.contains(Parent))
1073 return true;
1074
1075 return false;
1076 });
1077}
1078
1079/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1080/// considered outputs. A PHINodes is an output when more than one incoming
1081/// value has been marked by the CodeExtractor as an output.
1082///
1083/// \param CurrentExitFromRegion [in] - The block to analyze.
1084/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1085/// region.
1086/// \param RegionBlocks [in] - The basic blocks in the region.
1087/// \param Outputs [in, out] - The existing outputs for the region, we may add
1088/// PHINodes to this as we find that they replace output values.
1089/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1090/// totally replaced by a PHINode.
1091/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1092/// in PHINodes, but have other uses, and should still be considered outputs.
1094 BasicBlock *CurrentExitFromRegion,
1095 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1096 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1097 DenseSet<Value *> &OutputsReplacedByPHINode,
1098 DenseSet<Value *> &OutputsWithNonPhiUses) {
1099 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1100 // Find all incoming values from the outlining region.
1101 SmallVector<unsigned, 2> IncomingVals;
1102 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1103 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1104 IncomingVals.push_back(I);
1105
1106 // Do not process PHI if there are no predecessors from region.
1107 unsigned NumIncomingVals = IncomingVals.size();
1108 if (NumIncomingVals == 0)
1109 continue;
1110
1111 // If there is one predecessor, we mark it as a value that needs to be kept
1112 // as an output.
1113 if (NumIncomingVals == 1) {
1114 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1115 OutputsWithNonPhiUses.insert(V);
1116 OutputsReplacedByPHINode.erase(V);
1117 continue;
1118 }
1119
1120 // This PHINode will be used as an output value, so we add it to our list.
1121 Outputs.insert(&PN);
1122
1123 // Not all of the incoming values should be ignored as other inputs and
1124 // outputs may have uses in outlined region. If they have other uses
1125 // outside of the single PHINode we should not skip over it.
1126 for (unsigned Idx : IncomingVals) {
1127 Value *V = PN.getIncomingValue(Idx);
1128 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1129 OutputsWithNonPhiUses.insert(V);
1130 OutputsReplacedByPHINode.erase(V);
1131 continue;
1132 }
1133 if (!OutputsWithNonPhiUses.contains(V))
1134 OutputsReplacedByPHINode.insert(V);
1135 }
1136 }
1137}
1138
1139// Represents the type for the unsigned number denoting the output number for
1140// phi node, along with the canonical number for the exit block.
1141using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1142// The list of canonical numbers for the incoming values to a PHINode.
1144// The pair type representing the set of canonical values being combined in the
1145// PHINode, along with the location data for the PHINode.
1146using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1147
1148/// Encode \p PND as an integer for easy lookup based on the argument location,
1149/// the parent BasicBlock canonical numbering, and the canonical numbering of
1150/// the values stored in the PHINode.
1151///
1152/// \param PND - The data to hash.
1153/// \returns The hash code of \p PND.
1155 return llvm::hash_combine(
1156 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1157 llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1158}
1159
1160/// Create a special GVN for PHINodes that will be used outside of
1161/// the region. We create a hash code based on the Canonical number of the
1162/// parent BasicBlock, the canonical numbering of the values stored in the
1163/// PHINode and the aggregate argument location. This is used to find whether
1164/// this PHINode type has been given a canonical numbering already. If not, we
1165/// assign it a value and store it for later use. The value is returned to
1166/// identify different output schemes for the set of regions.
1167///
1168/// \param Region - The region that \p PN is an output for.
1169/// \param PN - The PHINode we are analyzing.
1170/// \param Blocks - The blocks for the region we are analyzing.
1171/// \param AggArgIdx - The argument \p PN will be stored into.
1172/// \returns An optional holding the assigned canonical number, or std::nullopt
1173/// if there is some attribute of the PHINode blocking it from being used.
1174static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1175 PHINode *PN,
1177 unsigned AggArgIdx) {
1178 OutlinableGroup &Group = *Region.Parent;
1179 IRSimilarityCandidate &Cand = *Region.Candidate;
1180 BasicBlock *PHIBB = PN->getParent();
1181 CanonList PHIGVNs;
1182 Value *Incoming;
1183 BasicBlock *IncomingBlock;
1184 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1186 IncomingBlock = PN->getIncomingBlock(Idx);
1187 // If we cannot find a GVN, and the incoming block is included in the region
1188 // this means that the input to the PHINode is not included in the region we
1189 // are trying to analyze, meaning, that if it was outlined, we would be
1190 // adding an extra input. We ignore this case for now, and so ignore the
1191 // region.
1192 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1193 if (!OGVN && Blocks.contains(IncomingBlock)) {
1194 Region.IgnoreRegion = true;
1195 return std::nullopt;
1196 }
1197
1198 // If the incoming block isn't in the region, we don't have to worry about
1199 // this incoming value.
1200 if (!Blocks.contains(IncomingBlock))
1201 continue;
1202
1203 // Collect the canonical numbers of the values in the PHINode.
1204 unsigned GVN = *OGVN;
1205 OGVN = Cand.getCanonicalNum(GVN);
1206 assert(OGVN && "No GVN found for incoming value?");
1207 PHIGVNs.push_back(*OGVN);
1208
1209 // Find the incoming block and use the canonical numbering as well to define
1210 // the hash for the PHINode.
1211 OGVN = Cand.getGVN(IncomingBlock);
1212
1213 // If there is no number for the incoming block, it is because we have
1214 // split the candidate basic blocks. So we use the previous block that it
1215 // was split from to find the valid global value numbering for the PHINode.
1216 if (!OGVN) {
1217 assert(Cand.getStartBB() == IncomingBlock &&
1218 "Unknown basic block used in exit path PHINode.");
1219
1220 BasicBlock *PrevBlock = nullptr;
1221 // Iterate over the predecessors to the incoming block of the
1222 // PHINode, when we find a block that is not contained in the region
1223 // we know that this is the first block that we split from, and should
1224 // have a valid global value numbering.
1225 for (BasicBlock *Pred : predecessors(IncomingBlock))
1226 if (!Blocks.contains(Pred)) {
1227 PrevBlock = Pred;
1228 break;
1229 }
1230 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1231 OGVN = Cand.getGVN(PrevBlock);
1232 }
1233 GVN = *OGVN;
1234 OGVN = Cand.getCanonicalNum(GVN);
1235 assert(OGVN && "No GVN found for incoming block?");
1236 PHIGVNs.push_back(*OGVN);
1237 }
1238
1239 // Now that we have the GVNs for the incoming values, we are going to combine
1240 // them with the GVN of the incoming bock, and the output location of the
1241 // PHINode to generate a hash value representing this instance of the PHINode.
1244 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1245 assert(BBGVN && "Could not find GVN for the incoming block!");
1246
1247 BBGVN = Cand.getCanonicalNum(*BBGVN);
1248 assert(BBGVN && "Could not find canonical number for the incoming block!");
1249 // Create a pair of the exit block canonical value, and the aggregate
1250 // argument location, connected to the canonical numbers stored in the
1251 // PHINode.
1252 PHINodeData TemporaryPair =
1253 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1254 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1255
1256 // Look for and create a new entry in our connection between canonical
1257 // numbers for PHINodes, and the set of objects we just created.
1258 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1259 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1260 bool Inserted = false;
1261 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1262 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1263 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1264 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1265 }
1266
1267 return GVNToPHIIt->second;
1268}
1269
1270/// Create a mapping of the output arguments for the \p Region to the output
1271/// arguments of the overall outlined function.
1272///
1273/// \param [in,out] Region - The region of code to be analyzed.
1274/// \param [in] Outputs - The values found by the code extractor.
1275static void
1277 SetVector<Value *> &Outputs) {
1278 OutlinableGroup &Group = *Region.Parent;
1279 IRSimilarityCandidate &C = *Region.Candidate;
1280
1282 DenseSet<BasicBlock *> BlocksInRegion;
1283 C.getBasicBlocks(BlocksInRegion, BE);
1284
1285 // Find the exits to the region.
1287 for (BasicBlock *Block : BE)
1288 for (BasicBlock *Succ : successors(Block))
1289 if (!BlocksInRegion.contains(Succ))
1290 Exits.insert(Succ);
1291
1292 // After determining which blocks exit to PHINodes, we add these PHINodes to
1293 // the set of outputs to be processed. We also check the incoming values of
1294 // the PHINodes for whether they should no longer be considered outputs.
1295 DenseSet<Value *> OutputsReplacedByPHINode;
1296 DenseSet<Value *> OutputsWithNonPhiUses;
1297 for (BasicBlock *ExitBB : Exits)
1298 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1299 OutputsReplacedByPHINode,
1300 OutputsWithNonPhiUses);
1301
1302 // This counts the argument number in the extracted function.
1303 unsigned OriginalIndex = Region.NumExtractedInputs;
1304
1305 // This counts the argument number in the overall function.
1306 unsigned TypeIndex = Group.NumAggregateInputs;
1307 bool TypeFound;
1308 DenseSet<unsigned> AggArgsUsed;
1309
1310 // Iterate over the output types and identify if there is an aggregate pointer
1311 // type whose base type matches the current output type. If there is, we mark
1312 // that we will use this output register for this value. If not we add another
1313 // type to the overall argument type list. We also store the GVNs used for
1314 // stores to identify which values will need to be moved into an special
1315 // block that holds the stores to the output registers.
1316 for (Value *Output : Outputs) {
1317 TypeFound = false;
1318 // We can do this since it is a result value, and will have a number
1319 // that is necessarily the same. BUT if in the future, the instructions
1320 // do not have to be in same order, but are functionally the same, we will
1321 // have to use a different scheme, as one-to-one correspondence is not
1322 // guaranteed.
1323 unsigned ArgumentSize = Group.ArgumentTypes.size();
1324
1325 // If the output is combined in a PHINode, we make sure to skip over it.
1326 if (OutputsReplacedByPHINode.contains(Output))
1327 continue;
1328
1329 unsigned AggArgIdx = 0;
1330 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1331 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1332 continue;
1333
1334 if (!AggArgsUsed.insert(Jdx).second)
1335 continue;
1336
1337 TypeFound = true;
1338 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1339 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1340 AggArgIdx = Jdx;
1341 break;
1342 }
1343
1344 // We were unable to find an unused type in the output type set that matches
1345 // the output, so we add a pointer type to the argument types of the overall
1346 // function to handle this output and create a mapping to it.
1347 if (!TypeFound) {
1348 Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
1349 M.getDataLayout().getAllocaAddrSpace()));
1350 // Mark the new pointer type as the last value in the aggregate argument
1351 // list.
1352 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1353 AggArgsUsed.insert(ArgTypeIdx);
1354 Region.ExtractedArgToAgg.insert(
1355 std::make_pair(OriginalIndex, ArgTypeIdx));
1356 Region.AggArgToExtracted.insert(
1357 std::make_pair(ArgTypeIdx, OriginalIndex));
1358 AggArgIdx = ArgTypeIdx;
1359 }
1360
1361 // TODO: Adapt to the extra input from the PHINode.
1362 PHINode *PN = dyn_cast<PHINode>(Output);
1363
1364 std::optional<unsigned> GVN;
1365 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1366 // Values outside the region can be combined into PHINode when we
1367 // have multiple exits. We collect both of these into a list to identify
1368 // which values are being used in the PHINode. Each list identifies a
1369 // different PHINode, and a different output. We store the PHINode as it's
1370 // own canonical value. These canonical values are also dependent on the
1371 // output argument it is saved to.
1372
1373 // If two PHINodes have the same canonical values, but different aggregate
1374 // argument locations, then they will have distinct Canonical Values.
1375 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1376 if (!GVN)
1377 return;
1378 } else {
1379 // If we do not have a PHINode we use the global value numbering for the
1380 // output value, to find the canonical number to add to the set of stored
1381 // values.
1382 GVN = C.getGVN(Output);
1383 GVN = C.getCanonicalNum(*GVN);
1384 }
1385
1386 // Each region has a potentially unique set of outputs. We save which
1387 // values are output in a list of canonical values so we can differentiate
1388 // among the different store schemes.
1389 Region.GVNStores.push_back(*GVN);
1390
1391 OriginalIndex++;
1392 TypeIndex++;
1393 }
1394
1395 // We sort the stored values to make sure that we are not affected by analysis
1396 // order when determining what combination of items were stored.
1397 stable_sort(Region.GVNStores);
1398}
1399
1400void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1401 DenseSet<unsigned> &NotSame) {
1402 std::vector<unsigned> Inputs;
1403 SetVector<Value *> ArgInputs, Outputs;
1404
1405 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1406 Outputs);
1407
1408 if (Region.IgnoreRegion)
1409 return;
1410
1411 // Map the inputs found by the CodeExtractor to the arguments found for
1412 // the overall function.
1414
1415 // Map the outputs found by the CodeExtractor to the arguments found for
1416 // the overall function.
1418}
1419
1420/// Replace the extracted function in the Region with a call to the overall
1421/// function constructed from the deduplicated similar regions, replacing and
1422/// remapping the values passed to the extracted function as arguments to the
1423/// new arguments of the overall function.
1424///
1425/// \param [in] M - The module to outline from.
1426/// \param [in] Region - The regions of extracted code to be replaced with a new
1427/// function.
1428/// \returns a call instruction with the replaced function.
1430 std::vector<Value *> NewCallArgs;
1432
1433 OutlinableGroup &Group = *Region.Parent;
1434 CallInst *Call = Region.Call;
1435 assert(Call && "Call to replace is nullptr?");
1436 Function *AggFunc = Group.OutlinedFunction;
1437 assert(AggFunc && "Function to replace with is nullptr?");
1438
1439 // If the arguments are the same size, there are not values that need to be
1440 // made into an argument, the argument ordering has not been change, or
1441 // different output registers to handle. We can simply replace the called
1442 // function in this case.
1443 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1444 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1445 << *AggFunc << " with same number of arguments\n");
1446 Call->setCalledFunction(AggFunc);
1447 return Call;
1448 }
1449
1450 // We have a different number of arguments than the new function, so
1451 // we need to use our previously mappings off extracted argument to overall
1452 // function argument, and constants to overall function argument to create the
1453 // new argument list.
1454 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1455
1456 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1457 Group.OutputGVNCombinations.size() > 1) {
1458 // If we are on the last argument, and we need to differentiate between
1459 // output blocks, add an integer to the argument list to determine
1460 // what block to take
1461 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1462 << Region.OutputBlockNum << "\n");
1463 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1464 Region.OutputBlockNum));
1465 continue;
1466 }
1467
1468 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1469 if (ArgPair != Region.AggArgToExtracted.end()) {
1470 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1471 // If we found the mapping from the extracted function to the overall
1472 // function, we simply add it to the argument list. We use the same
1473 // value, it just needs to honor the new order of arguments.
1474 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1475 << *ArgumentValue << "\n");
1476 NewCallArgs.push_back(ArgumentValue);
1477 continue;
1478 }
1479
1480 // If it is a constant, we simply add it to the argument list as a value.
1481 if (Region.AggArgToConstant.contains(AggArgIdx)) {
1482 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1483 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1484 << *CST << "\n");
1485 NewCallArgs.push_back(CST);
1486 continue;
1487 }
1488
1489 // Add a nullptr value if the argument is not found in the extracted
1490 // function. If we cannot find a value, it means it is not in use
1491 // for the region, so we should not pass anything to it.
1492 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1493 NewCallArgs.push_back(ConstantPointerNull::get(
1494 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1495 }
1496
1497 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1498 << *AggFunc << " with new set of arguments\n");
1499 // Create the new call instruction and erase the old one.
1500 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1501 Call->getIterator());
1502
1503 // It is possible that the call to the outlined function is either the first
1504 // instruction is in the new block, the last instruction, or both. If either
1505 // of these is the case, we need to make sure that we replace the instruction
1506 // in the IRInstructionData struct with the new call.
1507 CallInst *OldCall = Region.Call;
1508 if (Region.NewFront->Inst == OldCall)
1509 Region.NewFront->Inst = Call;
1510 if (Region.NewBack->Inst == OldCall)
1511 Region.NewBack->Inst = Call;
1512
1513 // Transfer any debug information.
1514 Call->setDebugLoc(Region.Call->getDebugLoc());
1515 // Since our output may determine which branch we go to, we make sure to
1516 // propagate this new call value through the module.
1517 OldCall->replaceAllUsesWith(Call);
1518
1519 // Remove the old instruction.
1520 OldCall->eraseFromParent();
1521 Region.Call = Call;
1522
1523 // Make sure that the argument in the new function has the SwiftError
1524 // argument.
1525 if (Group.SwiftErrorArgument)
1526 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1527
1528 return Call;
1529}
1530
1531/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1532/// for \p RetVal.
1533///
1534/// \param Group - The OutlinableGroup containing the information about the
1535/// overall outlined function.
1536/// \param RetVal - The return value or exit option that we are currently
1537/// evaluating.
1538/// \returns The found or newly created BasicBlock to contain the needed
1539/// PHINodes to be used as outputs.
1542 ReturnBlockForRetVal;
1543 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1544 ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1545 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1546 "Could not find output value!");
1547 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1548
1549 // Find if a PHIBlock exists for this return value already. If it is
1550 // the first time we are analyzing this, we will not, so we record it.
1551 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1552 if (PhiBlockForRetVal != Group.PHIBlocks.end())
1553 return PhiBlockForRetVal->second;
1554
1555 // If we did not find a block, we create one, and insert it into the
1556 // overall function and record it.
1557 bool Inserted = false;
1558 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1559 ReturnBB->getParent());
1560 std::tie(PhiBlockForRetVal, Inserted) =
1561 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1562
1563 // We find the predecessors of the return block in the newly created outlined
1564 // function in order to point them to the new PHIBlock rather than the already
1565 // existing return block.
1566 SmallVector<BranchInst *, 2> BranchesToChange;
1567 for (BasicBlock *Pred : predecessors(ReturnBB))
1568 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1569
1570 // Now we mark the branch instructions found, and change the references of the
1571 // return block to the newly created PHIBlock.
1572 for (BranchInst *BI : BranchesToChange)
1573 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1574 if (BI->getSuccessor(Succ) != ReturnBB)
1575 continue;
1576 BI->setSuccessor(Succ, PHIBlock);
1577 }
1578
1579 BranchInst::Create(ReturnBB, PHIBlock);
1580
1581 return PhiBlockForRetVal->second;
1582}
1583
1584/// For the function call now representing the \p Region, find the passed value
1585/// to that call that represents Argument \p A at the call location if the
1586/// call has already been replaced with a call to the overall, aggregate
1587/// function.
1588///
1589/// \param A - The Argument to get the passed value for.
1590/// \param Region - The extracted Region corresponding to the outlined function.
1591/// \returns The Value representing \p A at the call site.
1592static Value *
1594 const OutlinableRegion &Region) {
1595 // If we don't need to adjust the argument number at all (since the call
1596 // has already been replaced by a call to the overall outlined function)
1597 // we can just get the specified argument.
1598 return Region.Call->getArgOperand(A->getArgNo());
1599}
1600
1601/// For the function call now representing the \p Region, find the passed value
1602/// to that call that represents Argument \p A at the call location if the
1603/// call has only been replaced by the call to the aggregate function.
1604///
1605/// \param A - The Argument to get the passed value for.
1606/// \param Region - The extracted Region corresponding to the outlined function.
1607/// \returns The Value representing \p A at the call site.
1608static Value *
1610 const OutlinableRegion &Region) {
1611 unsigned ArgNum = A->getArgNo();
1612
1613 // If it is a constant, we can look at our mapping from when we created
1614 // the outputs to figure out what the constant value is.
1615 if (Region.AggArgToConstant.count(ArgNum))
1616 return Region.AggArgToConstant.find(ArgNum)->second;
1617
1618 // If it is not a constant, and we are not looking at the overall function, we
1619 // need to adjust which argument we are looking at.
1620 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1621 return Region.Call->getArgOperand(ArgNum);
1622}
1623
1624/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1625///
1626/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1627/// \param Region [in] - The OutlinableRegion containing \p PN.
1628/// \param OutputMappings [in] - The mapping of output values from outlined
1629/// region to their original values.
1630/// \param CanonNums [out] - The canonical numbering for the incoming values to
1631/// \p PN paired with their incoming block.
1632/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1633/// of \p Region rather than the overall function's call.
1636 const DenseMap<Value *, Value *> &OutputMappings,
1637 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1638 bool ReplacedWithOutlinedCall = true) {
1639 // Iterate over the incoming values.
1640 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1641 Value *IVal = PN->getIncomingValue(Idx);
1642 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1643 // If we have an argument as incoming value, we need to grab the passed
1644 // value from the call itself.
1645 if (Argument *A = dyn_cast<Argument>(IVal)) {
1646 if (ReplacedWithOutlinedCall)
1648 else
1650 }
1651
1652 // Get the original value if it has been replaced by an output value.
1653 IVal = findOutputMapping(OutputMappings, IVal);
1654
1655 // Find and add the canonical number for the incoming value.
1656 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1657 assert(GVN && "No GVN for incoming value");
1658 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1659 assert(CanonNum && "No Canonical Number for GVN");
1660 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1661 }
1662}
1663
1664/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1665/// in order to condense the number of instructions added to the outlined
1666/// function.
1667///
1668/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1669/// \param Region [in] - The OutlinableRegion containing \p PN.
1670/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1671/// \p PN in.
1672/// \param OutputMappings [in] - The mapping of output values from outlined
1673/// region to their original values.
1674/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1675/// matched.
1676/// \return the newly found or created PHINode in \p OverallPhiBlock.
1677static PHINode*
1679 BasicBlock *OverallPhiBlock,
1680 const DenseMap<Value *, Value *> &OutputMappings,
1681 DenseSet<PHINode *> &UsedPHIs) {
1682 OutlinableGroup &Group = *Region.Parent;
1683
1684
1685 // A list of the canonical numbering assigned to each incoming value, paired
1686 // with the incoming block for the PHINode passed into this function.
1688
1689 // We have to use the extracted function since we have merged this region into
1690 // the overall function yet. We make sure to reassign the argument numbering
1691 // since it is possible that the argument ordering is different between the
1692 // functions.
1693 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1694 /* ReplacedWithOutlinedCall = */ false);
1695
1696 OutlinableRegion *FirstRegion = Group.Regions[0];
1697
1698 // A list of the canonical numbering assigned to each incoming value, paired
1699 // with the incoming block for the PHINode that we are currently comparing
1700 // the passed PHINode to.
1702
1703 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1704 // the uses of the PHINode we are searching for, with the found PHINode.
1705 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1706 // If this PHINode has already been matched to another PHINode to be merged,
1707 // we skip it.
1708 if (UsedPHIs.contains(&CurrPN))
1709 continue;
1710
1711 CurrentCanonNums.clear();
1712 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1713 /* ReplacedWithOutlinedCall = */ true);
1714
1715 // If the list of incoming values is not the same length, then they cannot
1716 // match since there is not an analogue for each incoming value.
1717 if (PNCanonNums.size() != CurrentCanonNums.size())
1718 continue;
1719
1720 bool FoundMatch = true;
1721
1722 // We compare the canonical value for each incoming value in the passed
1723 // in PHINode to one already present in the outlined region. If the
1724 // incoming values do not match, then the PHINodes do not match.
1725
1726 // We also check to make sure that the incoming block matches as well by
1727 // finding the corresponding incoming block in the combined outlined region
1728 // for the current outlined region.
1729 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1730 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1731 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1732 if (ToCompareTo.first != ToAdd.first) {
1733 FoundMatch = false;
1734 break;
1735 }
1736
1737 BasicBlock *CorrespondingBlock =
1738 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1739 assert(CorrespondingBlock && "Found block is nullptr");
1740 if (CorrespondingBlock != ToCompareTo.second) {
1741 FoundMatch = false;
1742 break;
1743 }
1744 }
1745
1746 // If all incoming values and branches matched, then we can merge
1747 // into the found PHINode.
1748 if (FoundMatch) {
1749 UsedPHIs.insert(&CurrPN);
1750 return &CurrPN;
1751 }
1752 }
1753
1754 // If we've made it here, it means we weren't able to replace the PHINode, so
1755 // we must insert it ourselves.
1756 PHINode *NewPN = cast<PHINode>(PN.clone());
1757 NewPN->insertBefore(&*OverallPhiBlock->begin());
1758 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1759 Idx++) {
1760 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1761 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1762
1763 // Find corresponding basic block in the overall function for the incoming
1764 // block.
1765 BasicBlock *BlockToUse =
1766 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1767 NewPN->setIncomingBlock(Idx, BlockToUse);
1768
1769 // If we have an argument we make sure we replace using the argument from
1770 // the correct function.
1771 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1772 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1773 NewPN->setIncomingValue(Idx, Val);
1774 continue;
1775 }
1776
1777 // Find the corresponding value in the overall function.
1778 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1779 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1780 assert(Val && "Value is nullptr?");
1782 FirstRegion->RemappedArguments.find(Val);
1783 if (RemappedIt != FirstRegion->RemappedArguments.end())
1784 Val = RemappedIt->second;
1785 NewPN->setIncomingValue(Idx, Val);
1786 }
1787 return NewPN;
1788}
1789
1790// Within an extracted function, replace the argument uses of the extracted
1791// region with the arguments of the function for an OutlinableGroup.
1792//
1793/// \param [in] Region - The region of extracted code to be changed.
1794/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1795/// region.
1796/// \param [in] FirstFunction - A flag to indicate whether we are using this
1797/// function to define the overall outlined function for all the regions, or
1798/// if we are operating on one of the following regions.
1799static void
1802 const DenseMap<Value *, Value *> &OutputMappings,
1803 bool FirstFunction = false) {
1804 OutlinableGroup &Group = *Region.Parent;
1805 assert(Region.ExtractedFunction && "Region has no extracted function?");
1806
1807 Function *DominatingFunction = Region.ExtractedFunction;
1808 if (FirstFunction)
1809 DominatingFunction = Group.OutlinedFunction;
1810 DominatorTree DT(*DominatingFunction);
1811 DenseSet<PHINode *> UsedPHIs;
1812
1813 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1814 ArgIdx++) {
1815 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1816 "No mapping from extracted to outlined?");
1817 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1818 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1819 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1820 // The argument is an input, so we can simply replace it with the overall
1821 // argument value
1822 if (ArgIdx < Region.NumExtractedInputs) {
1823 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1824 << *Region.ExtractedFunction << " with " << *AggArg
1825 << " in function " << *Group.OutlinedFunction << "\n");
1826 Arg->replaceAllUsesWith(AggArg);
1827 Value *V = Region.Call->getArgOperand(ArgIdx);
1828 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1829 continue;
1830 }
1831
1832 // If we are replacing an output, we place the store value in its own
1833 // block inside the overall function before replacing the use of the output
1834 // in the function.
1835 assert(Arg->hasOneUse() && "Output argument can only have one use");
1836 User *InstAsUser = Arg->user_back();
1837 assert(InstAsUser && "User is nullptr!");
1838
1839 Instruction *I = cast<Instruction>(InstAsUser);
1840 BasicBlock *BB = I->getParent();
1841 SmallVector<BasicBlock *, 4> Descendants;
1842 DT.getDescendants(BB, Descendants);
1843 bool EdgeAdded = false;
1844 if (Descendants.size() == 0) {
1845 EdgeAdded = true;
1846 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1847 DT.getDescendants(BB, Descendants);
1848 }
1849
1850 // Iterate over the following blocks, looking for return instructions,
1851 // if we find one, find the corresponding output block for the return value
1852 // and move our store instruction there.
1853 for (BasicBlock *DescendBB : Descendants) {
1854 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1855 if (!RI)
1856 continue;
1857 Value *RetVal = RI->getReturnValue();
1858 auto VBBIt = OutputBBs.find(RetVal);
1859 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1860
1861 // If this is storing a PHINode, we must make sure it is included in the
1862 // overall function.
1863 StoreInst *SI = cast<StoreInst>(I);
1864
1865 Value *ValueOperand = SI->getValueOperand();
1866
1867 StoreInst *NewI = cast<StoreInst>(I->clone());
1868 NewI->setDebugLoc(DebugLoc());
1869 BasicBlock *OutputBB = VBBIt->second;
1870 NewI->insertInto(OutputBB, OutputBB->end());
1871 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1872 << *OutputBB << "\n");
1873
1874 // If this is storing a PHINode, we must make sure it is included in the
1875 // overall function.
1876 if (!isa<PHINode>(ValueOperand) ||
1877 Region.Candidate->getGVN(ValueOperand).has_value()) {
1878 if (FirstFunction)
1879 continue;
1880 Value *CorrVal =
1881 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1882 assert(CorrVal && "Value is nullptr?");
1883 NewI->setOperand(0, CorrVal);
1884 continue;
1885 }
1886 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1887 // If it has a value, it was not split by the code extractor, which
1888 // is what we are looking for.
1889 if (Region.Candidate->getGVN(PN))
1890 continue;
1891
1892 // We record the parent block for the PHINode in the Region so that
1893 // we can exclude it from checks later on.
1894 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1895
1896 // If this is the first function, we do not need to worry about mergiing
1897 // this with any other block in the overall outlined function, so we can
1898 // just continue.
1899 if (FirstFunction) {
1900 BasicBlock *PHIBlock = PN->getParent();
1901 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1902 continue;
1903 }
1904
1905 // We look for the aggregate block that contains the PHINodes leading into
1906 // this exit path. If we can't find one, we create one.
1907 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1908
1909 // For our PHINode, we find the combined canonical numbering, and
1910 // attempt to find a matching PHINode in the overall PHIBlock. If we
1911 // cannot, we copy the PHINode and move it into this new block.
1912 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1913 OutputMappings, UsedPHIs);
1914 NewI->setOperand(0, NewPN);
1915 }
1916
1917 // If we added an edge for basic blocks without a predecessor, we remove it
1918 // here.
1919 if (EdgeAdded)
1920 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1921 I->eraseFromParent();
1922
1923 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1924 << *Region.ExtractedFunction << " with " << *AggArg
1925 << " in function " << *Group.OutlinedFunction << "\n");
1926 Arg->replaceAllUsesWith(AggArg);
1927 }
1928}
1929
1930/// Within an extracted function, replace the constants that need to be lifted
1931/// into arguments with the actual argument.
1932///
1933/// \param Region [in] - The region of extracted code to be changed.
1935 OutlinableGroup &Group = *Region.Parent;
1936 // Iterate over the constants that need to be elevated into arguments
1937 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1938 unsigned AggArgIdx = Const.first;
1939 Function *OutlinedFunction = Group.OutlinedFunction;
1940 assert(OutlinedFunction && "Overall Function is not defined?");
1941 Constant *CST = Const.second;
1942 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1943 // Identify the argument it will be elevated to, and replace instances of
1944 // that constant in the function.
1945
1946 // TODO: If in the future constants do not have one global value number,
1947 // i.e. a constant 1 could be mapped to several values, this check will
1948 // have to be more strict. It cannot be using only replaceUsesWithIf.
1949
1950 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1951 << " in function " << *OutlinedFunction << " with "
1952 << *Arg << "\n");
1953 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1954 if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1955 return I->getFunction() == OutlinedFunction;
1956 return false;
1957 });
1958 }
1959}
1960
1961/// It is possible that there is a basic block that already performs the same
1962/// stores. This returns a duplicate block, if it exists
1963///
1964/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1965/// \param OutputStoreBBs [in] The existing output blocks.
1966/// \returns an optional value with the number output block if there is a match.
1967std::optional<unsigned> findDuplicateOutputBlock(
1969 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1970
1971 bool Mismatch = false;
1972 unsigned MatchingNum = 0;
1973 // We compare the new set output blocks to the other sets of output blocks.
1974 // If they are the same number, and have identical instructions, they are
1975 // considered to be the same.
1976 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1977 Mismatch = false;
1978 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1980 OutputBBs.find(VToB.first);
1981 if (OutputBBIt == OutputBBs.end()) {
1982 Mismatch = true;
1983 break;
1984 }
1985
1986 BasicBlock *CompBB = VToB.second;
1987 BasicBlock *OutputBB = OutputBBIt->second;
1988 if (CompBB->size() - 1 != OutputBB->size()) {
1989 Mismatch = true;
1990 break;
1991 }
1992
1993 BasicBlock::iterator NIt = OutputBB->begin();
1994 for (Instruction &I : *CompBB) {
1995 if (isa<BranchInst>(&I))
1996 continue;
1997
1998 if (!I.isIdenticalTo(&(*NIt))) {
1999 Mismatch = true;
2000 break;
2001 }
2002
2003 NIt++;
2004 }
2005 }
2006
2007 if (!Mismatch)
2008 return MatchingNum;
2009
2010 MatchingNum++;
2011 }
2012
2013 return std::nullopt;
2014}
2015
2016/// Remove empty output blocks from the outlined region.
2017///
2018/// \param BlocksToPrune - Mapping of return values output blocks for the \p
2019/// Region.
2020/// \param Region - The OutlinableRegion we are analyzing.
2021static bool
2024 bool AllRemoved = true;
2025 Value *RetValueForBB;
2026 BasicBlock *NewBB;
2028 // Iterate over the output blocks created in the outlined section.
2029 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2030 RetValueForBB = VtoBB.first;
2031 NewBB = VtoBB.second;
2032
2033 // If there are no instructions, we remove it from the module, and also
2034 // mark the value for removal from the return value to output block mapping.
2035 if (NewBB->size() == 0) {
2036 NewBB->eraseFromParent();
2037 ToRemove.push_back(RetValueForBB);
2038 continue;
2039 }
2040
2041 // Mark that we could not remove all the blocks since they were not all
2042 // empty.
2043 AllRemoved = false;
2044 }
2045
2046 // Remove the return value from the mapping.
2047 for (Value *V : ToRemove)
2048 BlocksToPrune.erase(V);
2049
2050 // Mark the region as having the no output scheme.
2051 if (AllRemoved)
2052 Region.OutputBlockNum = -1;
2053
2054 return AllRemoved;
2055}
2056
2057/// For the outlined section, move needed the StoreInsts for the output
2058/// registers into their own block. Then, determine if there is a duplicate
2059/// output block already created.
2060///
2061/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2062/// \param [in] Region - The OutlinableRegion that is being analyzed.
2063/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2064/// placed in.
2065/// \param [in] EndBBs - the final blocks of the extracted function.
2066/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2067/// been replaced by a new output value.
2068/// \param [in,out] OutputStoreBBs - The existing output blocks.
2073 const DenseMap<Value *, Value *> &OutputMappings,
2074 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2075 // If none of the output blocks have any instructions, this means that we do
2076 // not have to determine if it matches any of the other output schemes, and we
2077 // don't have to do anything else.
2078 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2079 return;
2080
2081 // Determine is there is a duplicate set of blocks.
2082 std::optional<unsigned> MatchingBB =
2083 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2084
2085 // If there is, we remove the new output blocks. If it does not,
2086 // we add it to our list of sets of output blocks.
2087 if (MatchingBB) {
2088 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2089 << Region.ExtractedFunction << " to " << *MatchingBB);
2090
2091 Region.OutputBlockNum = *MatchingBB;
2092 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2093 VtoBB.second->eraseFromParent();
2094 return;
2095 }
2096
2097 Region.OutputBlockNum = OutputStoreBBs.size();
2098
2099 Value *RetValueForBB;
2100 BasicBlock *NewBB;
2101 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2102 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2103 RetValueForBB = VtoBB.first;
2104 NewBB = VtoBB.second;
2106 EndBBs.find(RetValueForBB);
2107 LLVM_DEBUG(dbgs() << "Create output block for region in"
2108 << Region.ExtractedFunction << " to "
2109 << *NewBB);
2110 BranchInst::Create(VBBIt->second, NewBB);
2111 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2112 }
2113}
2114
2115/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2116/// before creating a basic block for each \p NewMap, and inserting into the new
2117/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2118///
2119/// \param OldMap [in] - The mapping to base the new mapping off of.
2120/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2121/// \param ParentFunc [in] - The function to put the new basic block in.
2122/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2123/// by an index value.
2126 Function *ParentFunc, Twine BaseName) {
2127 unsigned Idx = 0;
2128 std::vector<Value *> SortedKeys;
2129
2130 getSortedConstantKeys(SortedKeys, OldMap);
2131
2132 for (Value *RetVal : SortedKeys) {
2134 ParentFunc->getContext(),
2135 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2136 ParentFunc);
2137 NewMap.insert(std::make_pair(RetVal, NewBB));
2138 }
2139}
2140
2141/// Create the switch statement for outlined function to differentiate between
2142/// all the output blocks.
2143///
2144/// For the outlined section, determine if an outlined block already exists that
2145/// matches the needed stores for the extracted section.
2146/// \param [in] M - The module we are outlining from.
2147/// \param [in] OG - The group of regions to be outlined.
2148/// \param [in] EndBBs - The final blocks of the extracted function.
2149/// \param [in,out] OutputStoreBBs - The existing output blocks.
2152 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2153 // We only need the switch statement if there is more than one store
2154 // combination, or there is more than one set of output blocks. The first
2155 // will occur when we store different sets of values for two different
2156 // regions. The second will occur when we have two outputs that are combined
2157 // in a PHINode outside of the region in one outlined instance, and are used
2158 // seaparately in another. This will create the same set of OutputGVNs, but
2159 // will generate two different output schemes.
2160 if (OG.OutputGVNCombinations.size() > 1) {
2161 Function *AggFunc = OG.OutlinedFunction;
2162 // Create a final block for each different return block.
2164 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2165
2166 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2167 std::pair<Value *, BasicBlock *> &OutputBlock =
2168 *OG.EndBBs.find(RetBlockPair.first);
2169 BasicBlock *ReturnBlock = RetBlockPair.second;
2170 BasicBlock *EndBB = OutputBlock.second;
2171 Instruction *Term = EndBB->getTerminator();
2172 // Move the return value to the final block instead of the original exit
2173 // stub.
2174 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2175 // Put the switch statement in the old end basic block for the function
2176 // with a fall through to the new return block.
2177 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2178 << OutputStoreBBs.size() << "\n");
2179 SwitchInst *SwitchI =
2180 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2181 ReturnBlock, OutputStoreBBs.size(), EndBB);
2182
2183 unsigned Idx = 0;
2184 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2186 OutputStoreBB.find(OutputBlock.first);
2187
2188 if (OSBBIt == OutputStoreBB.end())
2189 continue;
2190
2191 BasicBlock *BB = OSBBIt->second;
2192 SwitchI->addCase(
2193 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2194 Term = BB->getTerminator();
2195 Term->setSuccessor(0, ReturnBlock);
2196 Idx++;
2197 }
2198 }
2199 return;
2200 }
2201
2202 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2203
2204 // If there needs to be stores, move them from the output blocks to their
2205 // corresponding ending block. We do not check that the OutputGVNCombinations
2206 // is equal to 1 here since that could just been the case where there are 0
2207 // outputs. Instead, we check whether there is more than one set of output
2208 // blocks since this is the only case where we would have to move the
2209 // stores, and erase the extraneous blocks.
2210 if (OutputStoreBBs.size() == 1) {
2211 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2212 << *OG.OutlinedFunction << "\n");
2213 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2214 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2216 EndBBs.find(VBPair.first);
2217 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2218 BasicBlock *EndBB = EndBBIt->second;
2219 BasicBlock *OutputBB = VBPair.second;
2220 Instruction *Term = OutputBB->getTerminator();
2221 Term->eraseFromParent();
2222 Term = EndBB->getTerminator();
2223 moveBBContents(*OutputBB, *EndBB);
2224 Term->moveBefore(*EndBB, EndBB->end());
2225 OutputBB->eraseFromParent();
2226 }
2227 }
2228}
2229
2230/// Fill the new function that will serve as the replacement function for all of
2231/// the extracted regions of a certain structure from the first region in the
2232/// list of regions. Replace this first region's extracted function with the
2233/// new overall function.
2234///
2235/// \param [in] M - The module we are outlining from.
2236/// \param [in] CurrentGroup - The group of regions to be outlined.
2237/// \param [in,out] OutputStoreBBs - The output blocks for each different
2238/// set of stores needed for the different functions.
2239/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2240/// once outlining is complete.
2241/// \param [in] OutputMappings - Extracted functions to erase from module
2242/// once outlining is complete.
2244 Module &M, OutlinableGroup &CurrentGroup,
2245 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2246 std::vector<Function *> &FuncsToRemove,
2247 const DenseMap<Value *, Value *> &OutputMappings) {
2248 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2249
2250 // Move first extracted function's instructions into new function.
2251 LLVM_DEBUG(dbgs() << "Move instructions from "
2252 << *CurrentOS->ExtractedFunction << " to instruction "
2253 << *CurrentGroup.OutlinedFunction << "\n");
2255 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2256
2257 // Transfer the attributes from the function to the new function.
2258 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2259 CurrentGroup.OutlinedFunction->addFnAttr(A);
2260
2261 // Create a new set of output blocks for the first extracted function.
2263 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2264 CurrentGroup.OutlinedFunction, "output_block_0");
2265 CurrentOS->OutputBlockNum = 0;
2266
2267 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2268 replaceConstants(*CurrentOS);
2269
2270 // We first identify if any output blocks are empty, if they are we remove
2271 // them. We then create a branch instruction to the basic block to the return
2272 // block for the function for each non empty output block.
2273 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2274 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2275 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2277 CurrentGroup.EndBBs.find(VToBB.first);
2278 BasicBlock *EndBB = VBBIt->second;
2279 BranchInst::Create(EndBB, VToBB.second);
2280 OutputStoreBBs.back().insert(VToBB);
2281 }
2282 }
2283
2284 // Replace the call to the extracted function with the outlined function.
2285 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2286
2287 // We only delete the extracted functions at the end since we may need to
2288 // reference instructions contained in them for mapping purposes.
2289 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2290}
2291
2292void IROutliner::deduplicateExtractedSections(
2293 Module &M, OutlinableGroup &CurrentGroup,
2294 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2295 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2296
2297 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2298
2299 OutlinableRegion *CurrentOS;
2300
2301 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2302 OutputMappings);
2303
2304 std::vector<Value *> SortedKeys;
2305 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2306 CurrentOS = CurrentGroup.Regions[Idx];
2308 *CurrentOS->ExtractedFunction);
2309
2310 // Create a set of BasicBlocks, one for each return block, to hold the
2311 // needed store instructions.
2314 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2315 "output_block_" + Twine(static_cast<unsigned>(Idx)));
2316 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2317 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2318 CurrentGroup.EndBBs, OutputMappings,
2319 OutputStoreBBs);
2320
2321 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2322 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2323 }
2324
2325 // Create a switch statement to handle the different output schemes.
2326 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2327
2328 OutlinedFunctionNum++;
2329}
2330
2331/// Checks that the next instruction in the InstructionDataList matches the
2332/// next instruction in the module. If they do not, there could be the
2333/// possibility that extra code has been inserted, and we must ignore it.
2334///
2335/// \param ID - The IRInstructionData to check the next instruction of.
2336/// \returns true if the InstructionDataList and actual instruction match.
2338 // We check if there is a discrepancy between the InstructionDataList
2339 // and the actual next instruction in the module. If there is, it means
2340 // that an extra instruction was added, likely by the CodeExtractor.
2341
2342 // Since we do not have any similarity data about this particular
2343 // instruction, we cannot confidently outline it, and must discard this
2344 // candidate.
2345 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2346 Instruction *NextIDLInst = NextIDIt->Inst;
2347 Instruction *NextModuleInst = nullptr;
2348 if (!ID.Inst->isTerminator())
2349 NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2350 else if (NextIDLInst != nullptr)
2351 NextModuleInst =
2352 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2353
2354 if (NextIDLInst && NextIDLInst != NextModuleInst)
2355 return false;
2356
2357 return true;
2358}
2359
2360bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2361 const OutlinableRegion &Region) {
2362 IRSimilarityCandidate *IRSC = Region.Candidate;
2363 unsigned StartIdx = IRSC->getStartIdx();
2364 unsigned EndIdx = IRSC->getEndIdx();
2365
2366 // A check to make sure that we are not about to attempt to outline something
2367 // that has already been outlined.
2368 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2369 if (Outlined.contains(Idx))
2370 return false;
2371
2372 // We check if the recorded instruction matches the actual next instruction,
2373 // if it does not, we fix it in the InstructionDataList.
2374 if (!Region.Candidate->backInstruction()->isTerminator()) {
2375 Instruction *NewEndInst =
2376 Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2377 assert(NewEndInst && "Next instruction is a nullptr?");
2378 if (Region.Candidate->end()->Inst != NewEndInst) {
2379 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2380 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2381 IRInstructionData(*NewEndInst,
2382 InstructionClassifier.visit(*NewEndInst), *IDL);
2383
2384 // Insert the first IRInstructionData of the new region after the
2385 // last IRInstructionData of the IRSimilarityCandidate.
2386 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2387 }
2388 }
2389
2390 return none_of(*IRSC, [this](IRInstructionData &ID) {
2392 return true;
2393
2394 return !this->InstructionClassifier.visit(ID.Inst);
2395 });
2396}
2397
2398void IROutliner::pruneIncompatibleRegions(
2399 std::vector<IRSimilarityCandidate> &CandidateVec,
2400 OutlinableGroup &CurrentGroup) {
2401 bool PreviouslyOutlined;
2402
2403 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2404 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2405 const IRSimilarityCandidate &RHS) {
2406 return LHS.getStartIdx() < RHS.getStartIdx();
2407 });
2408
2409 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2410 // Since outlining a call and a branch instruction will be the same as only
2411 // outlinining a call instruction, we ignore it as a space saving.
2412 if (FirstCandidate.getLength() == 2) {
2413 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2414 isa<BranchInst>(FirstCandidate.back()->Inst))
2415 return;
2416 }
2417
2418 unsigned CurrentEndIdx = 0;
2419 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2420 PreviouslyOutlined = false;
2421 unsigned StartIdx = IRSC.getStartIdx();
2422 unsigned EndIdx = IRSC.getEndIdx();
2423 const Function &FnForCurrCand = *IRSC.getFunction();
2424
2425 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2426 if (Outlined.contains(Idx)) {
2427 PreviouslyOutlined = true;
2428 break;
2429 }
2430
2431 if (PreviouslyOutlined)
2432 continue;
2433
2434 // Check over the instructions, and if the basic block has its address
2435 // taken for use somewhere else, we do not outline that block.
2436 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2437 return ID.Inst->getParent()->hasAddressTaken();
2438 });
2439
2440 if (BBHasAddressTaken)
2441 continue;
2442
2443 if (FnForCurrCand.hasOptNone())
2444 continue;
2445
2446 if (FnForCurrCand.hasFnAttribute("nooutline")) {
2447 LLVM_DEBUG({
2448 dbgs() << "... Skipping function with nooutline attribute: "
2449 << FnForCurrCand.getName() << "\n";
2450 });
2451 continue;
2452 }
2453
2454 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2455 !OutlineFromLinkODRs)
2456 continue;
2457
2458 // Greedily prune out any regions that will overlap with already chosen
2459 // regions.
2460 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2461 continue;
2462
2463 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2465 return true;
2466
2467 return !this->InstructionClassifier.visit(ID.Inst);
2468 });
2469
2470 if (BadInst)
2471 continue;
2472
2473 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2474 OutlinableRegion(IRSC, CurrentGroup);
2475 CurrentGroup.Regions.push_back(OS);
2476
2477 CurrentEndIdx = EndIdx;
2478 }
2479}
2480
2482IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2483 InstructionCost RegionBenefit = 0;
2484 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2485 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2486 // We add the number of instructions in the region to the benefit as an
2487 // estimate as to how much will be removed.
2488 RegionBenefit += Region->getBenefit(TTI);
2489 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2490 << " saved instructions to overfall benefit.\n");
2491 }
2492
2493 return RegionBenefit;
2494}
2495
2496/// For the \p OutputCanon number passed in find the value represented by this
2497/// canonical number. If it is from a PHINode, we pick the first incoming
2498/// value and return that Value instead.
2499///
2500/// \param Region - The OutlinableRegion to get the Value from.
2501/// \param OutputCanon - The canonical number to find the Value from.
2502/// \returns The Value represented by a canonical number \p OutputCanon in \p
2503/// Region.
2505 unsigned OutputCanon) {
2506 OutlinableGroup &CurrentGroup = *Region.Parent;
2507 // If the value is greater than the value in the tracker, we have a
2508 // PHINode and will instead use one of the incoming values to find the
2509 // type.
2510 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2511 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2512 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2513 "Could not find GVN set for PHINode number!");
2514 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2515 OutputCanon = *It->second.second.begin();
2516 }
2517 std::optional<unsigned> OGVN =
2518 Region.Candidate->fromCanonicalNum(OutputCanon);
2519 assert(OGVN && "Could not find GVN for Canonical Number?");
2520 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2521 assert(OV && "Could not find value for GVN?");
2522 return *OV;
2523}
2524
2526IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2527 InstructionCost OverallCost = 0;
2528 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2529 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2530
2531 // Each output incurs a load after the call, so we add that to the cost.
2532 for (unsigned OutputCanon : Region->GVNStores) {
2533 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2534 InstructionCost LoadCost =
2535 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2537
2538 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2539 << " instructions to cost for output of type "
2540 << *V->getType() << "\n");
2541 OverallCost += LoadCost;
2542 }
2543 }
2544
2545 return OverallCost;
2546}
2547
2548/// Find the extra instructions needed to handle any output values for the
2549/// region.
2550///
2551/// \param [in] M - The Module to outline from.
2552/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2553/// \param [in] TTI - The TargetTransformInfo used to collect information for
2554/// new instruction costs.
2555/// \returns the additional cost to handle the outputs.
2557 OutlinableGroup &CurrentGroup,
2559 InstructionCost OutputCost = 0;
2560 unsigned NumOutputBranches = 0;
2561
2562 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2563 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2564 DenseSet<BasicBlock *> CandidateBlocks;
2565 Candidate.getBasicBlocks(CandidateBlocks);
2566
2567 // Count the number of different output branches that point to blocks outside
2568 // of the region.
2569 DenseSet<BasicBlock *> FoundBlocks;
2570 for (IRInstructionData &ID : Candidate) {
2571 if (!isa<BranchInst>(ID.Inst))
2572 continue;
2573
2574 for (Value *V : ID.OperVals) {
2575 BasicBlock *BB = static_cast<BasicBlock *>(V);
2576 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2577 NumOutputBranches++;
2578 }
2579 }
2580
2581 CurrentGroup.BranchesToOutside = NumOutputBranches;
2582
2583 for (const ArrayRef<unsigned> &OutputUse :
2584 CurrentGroup.OutputGVNCombinations) {
2585 for (unsigned OutputCanon : OutputUse) {
2586 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2587 InstructionCost StoreCost =
2588 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2590
2591 // An instruction cost is added for each store set that needs to occur for
2592 // various output combinations inside the function, plus a branch to
2593 // return to the exit block.
2594 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2595 << " instructions to cost for output of type "
2596 << *V->getType() << "\n");
2597 OutputCost += StoreCost * NumOutputBranches;
2598 }
2599
2600 InstructionCost BranchCost =
2602 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2603 << " a branch instruction\n");
2604 OutputCost += BranchCost * NumOutputBranches;
2605 }
2606
2607 // If there is more than one output scheme, we must have a comparison and
2608 // branch for each different item in the switch statement.
2609 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2610 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2611 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2614 InstructionCost BranchCost =
2616
2617 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2618 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2619
2620 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2621 << " instructions for each switch case for each different"
2622 << " output path in a function\n");
2623 OutputCost += TotalCost * NumOutputBranches;
2624 }
2625
2626 return OutputCost;
2627}
2628
2629void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2630 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2631 CurrentGroup.Benefit += RegionBenefit;
2632 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2633
2634 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2635 CurrentGroup.Cost += OutputReloadCost;
2636 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2637
2638 InstructionCost AverageRegionBenefit =
2639 RegionBenefit / CurrentGroup.Regions.size();
2640 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2641 unsigned NumRegions = CurrentGroup.Regions.size();
2643 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2644
2645 // We add one region to the cost once, to account for the instructions added
2646 // inside of the newly created function.
2647 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2648 << " instructions to cost for body of new function.\n");
2649 CurrentGroup.Cost += AverageRegionBenefit;
2650 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2651
2652 // For each argument, we must add an instruction for loading the argument
2653 // out of the register and into a value inside of the newly outlined function.
2654 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2655 << " instructions to cost for each argument in the new"
2656 << " function.\n");
2657 CurrentGroup.Cost +=
2658 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2659 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2660
2661 // Each argument needs to either be loaded into a register or onto the stack.
2662 // Some arguments will only be loaded into the stack once the argument
2663 // registers are filled.
2664 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2665 << " instructions to cost for each argument in the new"
2666 << " function " << NumRegions << " times for the "
2667 << "needed argument handling at the call site.\n");
2668 CurrentGroup.Cost +=
2669 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2670 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2671
2672 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2673 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2674}
2675
2676void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2677 ArrayRef<Value *> Outputs,
2678 LoadInst *LI) {
2679 // For and load instructions following the call
2680 Value *Operand = LI->getPointerOperand();
2681 std::optional<unsigned> OutputIdx;
2682 // Find if the operand it is an output register.
2683 for (unsigned ArgIdx = Region.NumExtractedInputs;
2684 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2685 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2686 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2687 break;
2688 }
2689 }
2690
2691 // If we found an output register, place a mapping of the new value
2692 // to the original in the mapping.
2693 if (!OutputIdx)
2694 return;
2695
2696 if (!OutputMappings.contains(Outputs[*OutputIdx])) {
2697 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2698 << *Outputs[*OutputIdx] << "\n");
2699 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2700 } else {
2701 Value *Orig = OutputMappings.find(Outputs[*OutputIdx])->second;
2702 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2703 << *Outputs[*OutputIdx] << "\n");
2704 OutputMappings.insert(std::make_pair(LI, Orig));
2705 }
2706}
2707
2708bool IROutliner::extractSection(OutlinableRegion &Region) {
2709 SetVector<Value *> ArgInputs, Outputs, SinkCands;
2710 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2711 BasicBlock *InitialStart = Region.StartBB;
2712 Function *OrigF = Region.StartBB->getParent();
2713 CodeExtractorAnalysisCache CEAC(*OrigF);
2714 Region.ExtractedFunction =
2715 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2716
2717 // If the extraction was successful, find the BasicBlock, and reassign the
2718 // OutlinableRegion blocks
2719 if (!Region.ExtractedFunction) {
2720 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2721 << "\n");
2722 Region.reattachCandidate();
2723 return false;
2724 }
2725
2726 // Get the block containing the called branch, and reassign the blocks as
2727 // necessary. If the original block still exists, it is because we ended on
2728 // a branch instruction, and so we move the contents into the block before
2729 // and assign the previous block correctly.
2730 User *InstAsUser = Region.ExtractedFunction->user_back();
2731 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2732 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2733 assert(Region.PrevBB && "PrevBB is nullptr?");
2734 if (Region.PrevBB == InitialStart) {
2735 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2736 Instruction *BI = NewPrev->getTerminator();
2737 BI->eraseFromParent();
2738 moveBBContents(*InitialStart, *NewPrev);
2739 Region.PrevBB = NewPrev;
2740 InitialStart->eraseFromParent();
2741 }
2742
2743 Region.StartBB = RewrittenBB;
2744 Region.EndBB = RewrittenBB;
2745
2746 // The sequences of outlinable regions has now changed. We must fix the
2747 // IRInstructionDataList for consistency. Although they may not be illegal
2748 // instructions, they should not be compared with anything else as they
2749 // should not be outlined in this round. So marking these as illegal is
2750 // allowed.
2751 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2752 Instruction *BeginRewritten = &*RewrittenBB->begin();
2753 Instruction *EndRewritten = &*RewrittenBB->begin();
2754 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2755 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2756 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2757 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2758
2759 // Insert the first IRInstructionData of the new region in front of the
2760 // first IRInstructionData of the IRSimilarityCandidate.
2761 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2762 // Insert the first IRInstructionData of the new region after the
2763 // last IRInstructionData of the IRSimilarityCandidate.
2764 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2765 // Remove the IRInstructionData from the IRSimilarityCandidate.
2766 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2767
2768 assert(RewrittenBB != nullptr &&
2769 "Could not find a predecessor after extraction!");
2770
2771 // Iterate over the new set of instructions to find the new call
2772 // instruction.
2773 for (Instruction &I : *RewrittenBB)
2774 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2775 if (Region.ExtractedFunction == CI->getCalledFunction())
2776 Region.Call = CI;
2777 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2778 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2779 Region.reattachCandidate();
2780 return true;
2781}
2782
2783unsigned IROutliner::doOutline(Module &M) {
2784 // Find the possible similarity sections.
2785 InstructionClassifier.EnableBranches = !DisableBranches;
2786 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2787 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2788
2789 IRSimilarityIdentifier &Identifier = getIRSI(M);
2790 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2791
2792 // Sort them by size of extracted sections
2793 unsigned OutlinedFunctionNum = 0;
2794 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2795 // to sort them by the potential number of instructions to be outlined
2796 if (SimilarityCandidates.size() > 1)
2797 llvm::stable_sort(SimilarityCandidates,
2798 [](const std::vector<IRSimilarityCandidate> &LHS,
2799 const std::vector<IRSimilarityCandidate> &RHS) {
2800 return LHS[0].getLength() * LHS.size() >
2801 RHS[0].getLength() * RHS.size();
2802 });
2803 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2804 // each of the following for loops to avoid making an allocator.
2805 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2806
2807 DenseSet<unsigned> NotSame;
2808 std::vector<OutlinableGroup *> NegativeCostGroups;
2809 std::vector<OutlinableRegion *> OutlinedRegions;
2810 // Iterate over the possible sets of similarity.
2811 unsigned PotentialGroupIdx = 0;
2812 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2813 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2814
2815 // Remove entries that were previously outlined
2816 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2817
2818 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2819 // trying to outlined since there is no compatible similar instance of this
2820 // code.
2821 if (CurrentGroup.Regions.size() < 2)
2822 continue;
2823
2824 // Determine if there are any values that are the same constant throughout
2825 // each section in the set.
2826 NotSame.clear();
2827 CurrentGroup.findSameConstants(NotSame);
2828
2829 if (CurrentGroup.IgnoreGroup)
2830 continue;
2831
2832 // Create a CodeExtractor for each outlinable region. Identify inputs and
2833 // outputs for each section using the code extractor and create the argument
2834 // types for the Aggregate Outlining Function.
2835 OutlinedRegions.clear();
2836 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2837 // Break the outlinable region out of its parent BasicBlock into its own
2838 // BasicBlocks (see function implementation).
2839 OS->splitCandidate();
2840
2841 // There's a chance that when the region is split, extra instructions are
2842 // added to the region. This makes the region no longer viable
2843 // to be split, so we ignore it for outlining.
2844 if (!OS->CandidateSplit)
2845 continue;
2846
2848 DenseSet<BasicBlock *> BlocksInRegion;
2849 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2850 OS->CE = new (ExtractorAllocator.Allocate())
2851 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2852 false, nullptr, "outlined");
2853 findAddInputsOutputs(M, *OS, NotSame);
2854 if (!OS->IgnoreRegion)
2855 OutlinedRegions.push_back(OS);
2856
2857 // We recombine the blocks together now that we have gathered all the
2858 // needed information.
2859 OS->reattachCandidate();
2860 }
2861
2862 CurrentGroup.Regions = std::move(OutlinedRegions);
2863
2864 if (CurrentGroup.Regions.empty())
2865 continue;
2866
2867 CurrentGroup.collectGVNStoreSets(M);
2868
2869 if (CostModel)
2870 findCostBenefit(M, CurrentGroup);
2871
2872 // If we are adhering to the cost model, skip those groups where the cost
2873 // outweighs the benefits.
2874 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2876 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2877 ORE.emit([&]() {
2878 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2879 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2880 C->frontInstruction());
2881 R << "did not outline "
2882 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2883 << " regions due to estimated increase of "
2884 << ore::NV("InstructionIncrease",
2885 CurrentGroup.Cost - CurrentGroup.Benefit)
2886 << " instructions at locations ";
2887 interleave(
2888 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2889 [&R](OutlinableRegion *Region) {
2890 R << ore::NV(
2891 "DebugLoc",
2892 Region->Candidate->frontInstruction()->getDebugLoc());
2893 },
2894 [&R]() { R << " "; });
2895 return R;
2896 });
2897 continue;
2898 }
2899
2900 NegativeCostGroups.push_back(&CurrentGroup);
2901 }
2902
2903 ExtractorAllocator.DestroyAll();
2904
2905 if (NegativeCostGroups.size() > 1)
2906 stable_sort(NegativeCostGroups,
2907 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2908 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2909 });
2910
2911 std::vector<Function *> FuncsToRemove;
2912 for (OutlinableGroup *CG : NegativeCostGroups) {
2913 OutlinableGroup &CurrentGroup = *CG;
2914
2915 OutlinedRegions.clear();
2916 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2917 // We check whether our region is compatible with what has already been
2918 // outlined, and whether we need to ignore this item.
2919 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2920 continue;
2921 OutlinedRegions.push_back(Region);
2922 }
2923
2924 if (OutlinedRegions.size() < 2)
2925 continue;
2926
2927 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2928 // we are still outlining enough regions to make up for the added cost.
2929 CurrentGroup.Regions = std::move(OutlinedRegions);
2930 if (CostModel) {
2931 CurrentGroup.Benefit = 0;
2932 CurrentGroup.Cost = 0;
2933 findCostBenefit(M, CurrentGroup);
2934 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2935 continue;
2936 }
2937 OutlinedRegions.clear();
2938 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2939 Region->splitCandidate();
2940 if (!Region->CandidateSplit)
2941 continue;
2942 OutlinedRegions.push_back(Region);
2943 }
2944
2945 CurrentGroup.Regions = std::move(OutlinedRegions);
2946 if (CurrentGroup.Regions.size() < 2) {
2947 for (OutlinableRegion *R : CurrentGroup.Regions)
2948 R->reattachCandidate();
2949 continue;
2950 }
2951
2952 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2953 << " and benefit " << CurrentGroup.Benefit << "\n");
2954
2955 // Create functions out of all the sections, and mark them as outlined.
2956 OutlinedRegions.clear();
2957 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2959 DenseSet<BasicBlock *> BlocksInRegion;
2960 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2961 OS->CE = new (ExtractorAllocator.Allocate())
2962 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2963 false, nullptr, "outlined");
2964 bool FunctionOutlined = extractSection(*OS);
2965 if (FunctionOutlined) {
2966 unsigned StartIdx = OS->Candidate->getStartIdx();
2967 unsigned EndIdx = OS->Candidate->getEndIdx();
2968 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2969 Outlined.insert(Idx);
2970
2971 OutlinedRegions.push_back(OS);
2972 }
2973 }
2974
2975 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2976 << " with benefit " << CurrentGroup.Benefit
2977 << " and cost " << CurrentGroup.Cost << "\n");
2978
2979 CurrentGroup.Regions = std::move(OutlinedRegions);
2980
2981 if (CurrentGroup.Regions.empty())
2982 continue;
2983
2985 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2986 ORE.emit([&]() {
2987 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2988 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2989 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2990 << " regions with decrease of "
2991 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2992 << " instructions at locations ";
2993 interleave(
2994 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2995 [&R](OutlinableRegion *Region) {
2996 R << ore::NV("DebugLoc",
2997 Region->Candidate->frontInstruction()->getDebugLoc());
2998 },
2999 [&R]() { R << " "; });
3000 return R;
3001 });
3002
3003 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
3004 OutlinedFunctionNum);
3005 }
3006
3007 for (Function *F : FuncsToRemove)
3008 F->eraseFromParent();
3009
3010 return OutlinedFunctionNum;
3011}
3012
3014 CostModel = !NoCostModel;
3015 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
3016
3017 return doOutline(M) > 0;
3018}
3019
3021 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3022
3023 std::function<TargetTransformInfo &(Function &)> GTTI =
3024 [&FAM](Function &F) -> TargetTransformInfo & {
3026 };
3027
3028 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3029 [&AM](Module &M) -> IRSimilarityIdentifier & {
3030 return AM.getResult<IRSimilarityAnalysis>(M);
3031 };
3032
3033 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3034 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3035 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3036 ORE.reset(new OptimizationRemarkEmitter(&F));
3037 return *ORE;
3038 };
3039
3040 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3041 return PreservedAnalyses::none();
3042 return PreservedAnalyses::all();
3043}
ReachingDefAnalysis InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:832
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:156
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:706
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:863
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs, std::vector< Function * > &FuncsToRemove, const DenseMap< Value *, Value * > &OutputMappings)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:549
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:165
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
Definition: IROutliner.cpp:220
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:934
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:806
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:620
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
Definition: IROutliner.cpp:530
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:463
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
Definition: IROutliner.cpp:778
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
This header defines various interfaces for pass management in LLVM.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
AttributeSet getFnAttrs() const
The function attributes are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:662
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:279
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
size_t size() const
Definition: BasicBlock.h:469
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1826
This is an important base class in LLVM.
Definition: Constant.h:42
Debug location.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Class to represent function types.
Definition: DerivedTypes.h:105
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:641
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
const BasicBlock & back() const
Definition: Function.h:862
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
size_t size() const
Definition: Function.h:858
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:701
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:669
size_t arg_size() const
Definition: Function.h:901
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:519
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
bool isTerminator() const
Definition: Instruction.h:277
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:121
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
iterator begin()
Definition: RegionInfo.h:558
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
iterator end()
Definition: RegionInfo.h:559
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition: SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition: SetVector.h:84
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:229
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
User * user_back()
Definition: Value.h:407
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:542
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1096
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:193
bool erase(const ValueT &V)
Definition: DenseSet.h:97
An opaque object representing a hash code.
Definition: Hashing.h:75
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:202
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:165
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:136
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:2169
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:42
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:38
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:46
@ Other
Any other memory.
auto predecessors(const MachineBasicBlock *BB)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
Definition: DebugInfo.cpp:439
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:72
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:135
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:78
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:98
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:598
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:131
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
Definition: IROutliner.cpp:123
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
Definition: IROutliner.cpp:119
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:74
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
Definition: IROutliner.cpp:124
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:86
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:89
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:114
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:82
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:102
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:93
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:80
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:110
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:605
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:128
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:106
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:486
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:248
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:186
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:198
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:375