LLVM 23.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
64 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
82 /// The Function for the collective overall function.
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(TargetBB.end(), &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(*CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
195 return FoundValueOpt.value_or(nullptr);
196}
197
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis())
225 for (BasicBlock *Incoming : PN.blocks())
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 if (Included.contains(Incoming))
229 Incoming->getTerminator()->replaceSuccessorWith(Find, Replace);
230}
231
232
234 assert(!CandidateSplit && "Candidate already split!");
235
236 Instruction *BackInst = Candidate->backInstruction();
237
238 Instruction *EndInst = nullptr;
239 // Check whether the last instruction is a terminator, if it is, we do
240 // not split on the following instruction. We leave the block as it is. We
241 // also check that this is not the last instruction in the Module, otherwise
242 // the check for whether the current following instruction matches the
243 // previously recorded instruction will be incorrect.
244 if (!BackInst->isTerminator() ||
245 BackInst->getParent() != &BackInst->getFunction()->back()) {
246 EndInst = Candidate->end()->Inst;
247 assert(EndInst && "Expected an end instruction?");
248 }
249
250 // We check if the current instruction following the last instruction in the
251 // region is the same as the recorded instruction following the last
252 // instruction. If they do not match, there could be problems in rewriting
253 // the program after outlining, so we ignore it.
254 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
255 return;
256
257 Instruction *StartInst = (*Candidate->begin()).Inst;
258 assert(StartInst && "Expected a start instruction?");
259 StartBB = StartInst->getParent();
260 PrevBB = StartBB;
261
263 Candidate->getBasicBlocks(BBSet);
264
265 // We iterate over the instructions in the region, if we find a PHINode, we
266 // check if there are predecessors outside of the region, if there are,
267 // we ignore this region since we are unable to handle the severing of the
268 // phi node right now.
269
270 // TODO: Handle extraneous inputs for PHINodes through variable number of
271 // inputs, similar to how outputs are handled.
272 BasicBlock::iterator It = StartInst->getIterator();
273 EndBB = BackInst->getParent();
274 BasicBlock *IBlock;
275 BasicBlock *PHIPredBlock = nullptr;
276 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
277 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
278 unsigned NumPredsOutsideRegion = 0;
279 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
280 if (!BBSet.contains(PN->getIncomingBlock(i))) {
281 PHIPredBlock = PN->getIncomingBlock(i);
282 ++NumPredsOutsideRegion;
283 continue;
284 }
285
286 // We must consider the case there the incoming block to the PHINode is
287 // the same as the final block of the OutlinableRegion. If this is the
288 // case, the branch from this block must also be outlined to be valid.
289 IBlock = PN->getIncomingBlock(i);
290 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
291 PHIPredBlock = PN->getIncomingBlock(i);
292 ++NumPredsOutsideRegion;
293 }
294 }
295
296 if (NumPredsOutsideRegion > 1)
297 return;
298
299 It++;
300 }
301
302 // If the region starts with a PHINode, but is not the initial instruction of
303 // the BasicBlock, we ignore this region for now.
304 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
305 return;
306
307 // If the region ends with a PHINode, but does not contain all of the phi node
308 // instructions of the region, we ignore it for now.
309 if (isa<PHINode>(BackInst) &&
310 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
311 return;
312
313 // The basic block gets split like so:
314 // block: block:
315 // inst1 inst1
316 // inst2 inst2
317 // region1 br block_to_outline
318 // region2 block_to_outline:
319 // region3 -> region1
320 // region4 region2
321 // inst3 region3
322 // inst4 region4
323 // br block_after_outline
324 // block_after_outline:
325 // inst3
326 // inst4
327
328 std::string OriginalName = PrevBB->getName().str();
329
330 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
331 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
332 // If there was a PHINode with an incoming block outside the region,
333 // make sure is correctly updated in the newly split block.
334 if (PHIPredBlock)
335 PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB);
336
337 CandidateSplit = true;
338 if (!BackInst->isTerminator()) {
339 EndBB = EndInst->getParent();
340 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
341 EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
342 FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
343 } else {
344 EndBB = BackInst->getParent();
345 EndsInBranch = true;
346 FollowBB = nullptr;
347 }
348
349 // Refind the basic block set.
350 BBSet.clear();
351 Candidate->getBasicBlocks(BBSet);
352 // For the phi nodes in the new starting basic block of the region, we
353 // reassign the targets of the basic blocks branching instructions.
355 if (FollowBB)
357}
358
360 assert(CandidateSplit && "Candidate is not split!");
361
362 // The basic block gets reattached like so:
363 // block: block:
364 // inst1 inst1
365 // inst2 inst2
366 // br block_to_outline region1
367 // block_to_outline: -> region2
368 // region1 region3
369 // region2 region4
370 // region3 inst3
371 // region4 inst4
372 // br block_after_outline
373 // block_after_outline:
374 // inst3
375 // inst4
376 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
377
378 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
379 // Make sure PHINode references to the block we are merging into are
380 // updated to be incoming blocks from the predecessor to the current block.
381
382 // NOTE: If this is updated such that the outlined block can have more than
383 // one incoming block to a PHINode, this logic will have to updated
384 // to handle multiple precessors instead.
385
386 // We only need to update this if the outlined section contains a PHINode, if
387 // it does not, then the incoming block was never changed in the first place.
388 // On the other hand, if PrevBB has no predecessors, it means that all
389 // incoming blocks to the first block are contained in the region, and there
390 // will be nothing to update.
391 Instruction *StartInst = (*Candidate->begin()).Inst;
392 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
393 assert(!PrevBB->hasNPredecessorsOrMore(2) &&
394 "PrevBB has more than one predecessor. Should be 0 or 1.");
395 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
396 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB);
397 }
398 PrevBB->getTerminator()->eraseFromParent();
399
400 // If we reattaching after outlining, we iterate over the phi nodes to
401 // the initial block, and reassign the branch instructions of the incoming
402 // blocks to the block we are remerging into.
403 if (!ExtractedFunction) {
405 Candidate->getBasicBlocks(BBSet);
406
408 if (!EndsInBranch)
410 }
411
413
414 BasicBlock *PlacementBB = PrevBB;
415 if (StartBB != EndBB)
416 PlacementBB = EndBB;
417 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
418 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
419 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
420 PlacementBB->getTerminator()->eraseFromParent();
421 moveBBContents(*FollowBB, *PlacementBB);
422 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
423 FollowBB->eraseFromParent();
424 }
425
426 PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
427 StartBB->eraseFromParent();
428
429 // Make sure to save changes back to the StartBB.
430 StartBB = PrevBB;
431 EndBB = nullptr;
432 PrevBB = nullptr;
433 FollowBB = nullptr;
434
435 CandidateSplit = false;
436}
437
438/// Find whether \p V matches the Constants previously found for the \p GVN.
439///
440/// \param V - The value to check for consistency.
441/// \param GVN - The global value number assigned to \p V.
442/// \param GVNToConstant - The mapping of global value number to Constants.
443/// \returns true if the Value matches the Constant mapped to by V and false if
444/// it \p V is a Constant but does not match.
445/// \returns std::nullopt if \p V is not a Constant.
446static std::optional<bool>
447constantMatches(Value *V, unsigned GVN,
448 DenseMap<unsigned, Constant *> &GVNToConstant) {
449 // See if we have a constants
451 if (!CST)
452 return std::nullopt;
453
454 // Holds a mapping from a global value number to a Constant.
456 bool Inserted;
457
458
459 // If we have a constant, try to make a new entry in the GVNToConstant.
460 std::tie(GVNToConstantIt, Inserted) =
461 GVNToConstant.insert(std::make_pair(GVN, CST));
462 // If it was found and is not equal, it is not the same. We do not
463 // handle this case yet, and exit early.
464 if (Inserted || (GVNToConstantIt->second == CST))
465 return true;
466
467 return false;
468}
469
471 InstructionCost Benefit = 0;
472
473 // Estimate the benefit of outlining a specific sections of the program. We
474 // delegate mostly this task to the TargetTransformInfo so that if the target
475 // has specific changes, we can have a more accurate estimate.
476
477 // However, getInstructionCost delegates the code size calculation for
478 // arithmetic instructions to getArithmeticInstrCost in
479 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
480 // code size for a division and remainder instruction to be equal to 4, and
481 // everything else to 1. This is not an accurate representation of the
482 // division instruction for targets that have a native division instruction.
483 // To be overly conservative, we only add 1 to the number of instructions for
484 // each division instruction.
485 for (IRInstructionData &ID : *Candidate) {
486 Instruction *I = ID.Inst;
487 switch (I->getOpcode()) {
488 case Instruction::FDiv:
489 case Instruction::FRem:
490 case Instruction::SDiv:
491 case Instruction::SRem:
492 case Instruction::UDiv:
493 case Instruction::URem:
494 Benefit += 1;
495 break;
496 default:
497 Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
498 break;
499 }
500 }
501
502 return Benefit;
503}
504
505/// Check the \p OutputMappings structure for value \p Input, if it exists
506/// it has been used as an output for outlining, and has been renamed, and we
507/// return the new value, otherwise, we return the same value.
508///
509/// \param OutputMappings [in] - The mapping of values to their renamed value
510/// after being used as an output for an outlined region.
511/// \param Input [in] - The value to find the remapped value of, if it exists.
512/// \return The remapped value if it has been renamed, and the same value if has
513/// not.
515 Value *Input) {
517 OutputMappings.find(Input);
518 if (OutputMapping != OutputMappings.end())
519 return OutputMapping->second;
520 return Input;
521}
522
523/// Find whether \p Region matches the global value numbering to Constant
524/// mapping found so far.
525///
526/// \param Region - The OutlinableRegion we are checking for constants
527/// \param GVNToConstant - The mapping of global value number to Constants.
528/// \param NotSame - The set of global value numbers that do not have the same
529/// constant in each region.
530/// \returns true if all Constants are the same in every use of a Constant in \p
531/// Region and false if not
532static bool
534 DenseMap<unsigned, Constant *> &GVNToConstant,
535 DenseSet<unsigned> &NotSame) {
536 bool ConstantsTheSame = true;
537
538 IRSimilarityCandidate &C = *Region.Candidate;
539 for (IRInstructionData &ID : C) {
540
541 // Iterate over the operands in an instruction. If the global value number,
542 // assigned by the IRSimilarityCandidate, has been seen before, we check if
543 // the number has been found to be not the same value in each instance.
544 for (Value *V : ID.OperVals) {
545 std::optional<unsigned> GVNOpt = C.getGVN(V);
546 assert(GVNOpt && "Expected a GVN for operand?");
547 unsigned GVN = *GVNOpt;
548
549 // Check if this global value has been found to not be the same already.
550 if (NotSame.contains(GVN)) {
551 if (isa<Constant>(V))
552 ConstantsTheSame = false;
553 continue;
554 }
555
556 // If it has been the same so far, we check the value for if the
557 // associated Constant value match the previous instances of the same
558 // global value number. If the global value does not map to a Constant,
559 // it is considered to not be the same value.
560 std::optional<bool> ConstantMatches =
561 constantMatches(V, GVN, GVNToConstant);
562 if (ConstantMatches) {
563 if (*ConstantMatches)
564 continue;
565 else
566 ConstantsTheSame = false;
567 }
568
569 // While this value is a register, it might not have been previously,
570 // make sure we don't already have a constant mapped to this global value
571 // number.
572 if (GVNToConstant.contains(GVN))
573 ConstantsTheSame = false;
574
575 NotSame.insert(GVN);
576 }
577 }
578
579 return ConstantsTheSame;
580}
581
583 DenseMap<unsigned, Constant *> GVNToConstant;
584
586 collectRegionsConstants(*Region, GVNToConstant, NotSame);
587}
588
590 for (OutlinableRegion *OS : Regions)
591 OutputGVNCombinations.insert(OS->GVNStores);
592
593 // We are adding an extracted argument to decide between which output path
594 // to use in the basic block. It is used in a switch statement and only
595 // needs to be an integer.
596 if (OutputGVNCombinations.size() > 1)
597 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
598}
599
600/// Get the subprogram if it exists for one of the outlined regions.
601///
602/// \param [in] Group - The set of regions to find a subprogram for.
603/// \returns the subprogram if it exists, or nullptr.
605 for (OutlinableRegion *OS : Group.Regions)
606 if (Function *F = OS->Call->getFunction())
607 if (DISubprogram *SP = F->getSubprogram())
608 return SP;
609
610 return nullptr;
611}
612
613Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
614 unsigned FunctionNameSuffix) {
615 assert(!Group.OutlinedFunction && "Function is already defined!");
616
617 Type *RetTy = Type::getVoidTy(M.getContext());
618 // All extracted functions _should_ have the same return type at this point
619 // since the similarity identifier ensures that all branches outside of the
620 // region occur in the same place.
621
622 // NOTE: Should we ever move to the model that uses a switch at every point
623 // needed, meaning that we could branch within the region or out, it is
624 // possible that we will need to switch to using the most general case all of
625 // the time.
626 for (OutlinableRegion *R : Group.Regions) {
627 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
628 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
629 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
630 RetTy = ExtractedFuncType;
631 }
632
634 RetTy, Group.ArgumentTypes, false);
635
636 // These functions will only be called from within the same module, so
637 // we can set an internal linkage.
640 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
641
642 // Transfer the swifterr attribute to the correct function parameter.
643 if (Group.SwiftErrorArgument)
645 Attribute::SwiftError);
646
647 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
648 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
649
650 // If there's a DISubprogram associated with this outlined function, then
651 // emit debug info for the outlined function.
652 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
653 Function *F = Group.OutlinedFunction;
654 // We have a DISubprogram. Get its DICompileUnit.
655 DICompileUnit *CU = SP->getUnit();
656 DIBuilder DB(M, true, CU);
657 DIFile *Unit = SP->getFile();
658 Mangler Mg;
659 // Get the mangled name of the function for the linkage name.
660 std::string Dummy;
661 llvm::raw_string_ostream MangledNameStream(Dummy);
662 Mg.getNameWithPrefix(MangledNameStream, F, false);
663
664 DISubprogram *OutlinedSP = DB.createFunction(
665 Unit /* Context */, F->getName(), Dummy, Unit /* File */,
666 0 /* Line 0 is reserved for compiler-generated code. */,
667 DB.createSubroutineType(DB.getOrCreateTypeArray({})), /* void type */
668 0, /* Line 0 is reserved for compiler-generated code. */
669 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
670 /* Outlined code is optimized code by definition. */
671 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
672
673 // Attach subprogram to the function.
674 F->setSubprogram(OutlinedSP);
675 // We're done with the DIBuilder.
676 DB.finalize();
677 }
678
679 return Group.OutlinedFunction;
680}
681
682/// Move each BasicBlock in \p Old to \p New.
683///
684/// \param [in] Old - The function to move the basic blocks from.
685/// \param [in] New - The function to move the basic blocks to.
686/// \param [out] NewEnds - The return blocks of the new overall function.
687static void moveFunctionData(Function &Old, Function &New,
689 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
690 CurrBB.removeFromParent();
691 CurrBB.insertInto(&New);
692 Instruction *I = CurrBB.getTerminator();
693
694 // For each block we find a return instruction is, it is a potential exit
695 // path for the function. We keep track of each block based on the return
696 // value here.
698 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
699
700 for (Instruction &Val : CurrBB) {
701 // Since debug-info originates from many different locations in the
702 // program, it will cause incorrect reporting from a debugger if we keep
703 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
704 // here, collect intrinsics for removal later.
705 Val.dropDbgRecords();
706
707 // We must handle the scoping of called functions differently than
708 // other outlined instructions.
709 if (!isa<CallInst>(&Val)) {
710 // Remove the debug information for outlined functions.
711 Val.setDebugLoc(DebugLoc::getDropped());
712
713 // Loop info metadata may contain line locations. Update them to have no
714 // value in the new subprogram since the outlined code could be from
715 // several locations.
716 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
717 if (DISubprogram *SP = New.getSubprogram())
718 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
719 return DILocation::get(New.getContext(), Loc->getLine(),
720 Loc->getColumn(), SP, nullptr);
721 return MD;
722 };
723 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
724 continue;
725 }
726
727 // Edit the scope of called functions inside of outlined functions.
728 if (DISubprogram *SP = New.getSubprogram()) {
729 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
730 Val.setDebugLoc(DI);
731 }
732 }
733 }
734}
735
736/// Find the constants that will need to be lifted into arguments
737/// as they are not the same in each instance of the region.
738///
739/// \param [in] C - The IRSimilarityCandidate containing the region we are
740/// analyzing.
741/// \param [in] NotSame - The set of global value numbers that do not have a
742/// single Constant across all OutlinableRegions similar to \p C.
743/// \param [out] Inputs - The list containing the global value numbers of the
744/// arguments needed for the region of code.
746 std::vector<unsigned> &Inputs) {
748 // Iterate over the instructions, and find what constants will need to be
749 // extracted into arguments.
750 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
751 IDIt != EndIDIt; IDIt++) {
752 for (Value *V : (*IDIt).OperVals) {
753 // Since these are stored before any outlining, they will be in the
754 // global value numbering.
755 unsigned GVN = *C.getGVN(V);
756 if (isa<Constant>(V))
757 if (NotSame.contains(GVN) && Seen.insert(GVN).second)
758 Inputs.push_back(GVN);
759 }
760 }
761}
762
763/// Find the GVN for the inputs that have been found by the CodeExtractor.
764///
765/// \param [in] C - The IRSimilarityCandidate containing the region we are
766/// analyzing.
767/// \param [in] CurrentInputs - The set of inputs found by the
768/// CodeExtractor.
769/// \param [in] OutputMappings - The mapping of values that have been replaced
770/// by a new output value.
771/// \param [out] EndInputNumbers - The global value numbers for the extracted
772/// arguments.
774 SetVector<Value *> &CurrentInputs,
775 const DenseMap<Value *, Value *> &OutputMappings,
776 std::vector<unsigned> &EndInputNumbers) {
777 // Get the Global Value Number for each input. We check if the Value has been
778 // replaced by a different value at output, and use the original value before
779 // replacement.
780 for (Value *Input : CurrentInputs) {
781 assert(Input && "Have a nullptr as an input");
782 auto It = OutputMappings.find(Input);
783 if (It != OutputMappings.end())
784 Input = It->second;
785 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
786 EndInputNumbers.push_back(*C.getGVN(Input));
787 }
788}
789
790/// Find the original value for the \p ArgInput values if any one of them was
791/// replaced during a previous extraction.
792///
793/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
794/// \param [in] OutputMappings - The mapping of values that have been replaced
795/// by a new output value.
796/// \param [out] RemappedArgInputs - The remapped values according to
797/// \p OutputMappings that will be extracted.
798static void
800 const DenseMap<Value *, Value *> &OutputMappings,
801 SetVector<Value *> &RemappedArgInputs) {
802 // Get the global value number for each input that will be extracted as an
803 // argument by the code extractor, remapping if needed for reloaded values.
804 for (Value *Input : ArgInputs) {
805 auto It = OutputMappings.find(Input);
806 if (It != OutputMappings.end())
807 Input = It->second;
808 RemappedArgInputs.insert(Input);
809 }
810}
811
812/// Find the input GVNs and the output values for a region of Instructions.
813/// Using the code extractor, we collect the inputs to the extracted function.
814///
815/// The \p Region can be identified as needing to be ignored in this function.
816/// It should be checked whether it should be ignored after a call to this
817/// function.
818///
819/// \param [in,out] Region - The region of code to be analyzed.
820/// \param [out] InputGVNs - The global value numbers for the extracted
821/// arguments.
822/// \param [in] NotSame - The global value numbers in the region that do not
823/// have the same constant value in the regions structurally similar to
824/// \p Region.
825/// \param [in] OutputMappings - The mapping of values that have been replaced
826/// by a new output value after extraction.
827/// \param [out] ArgInputs - The values of the inputs to the extracted function.
828/// \param [out] Outputs - The set of values extracted by the CodeExtractor
829/// as outputs.
831 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
832 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
833 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
834 IRSimilarityCandidate &C = *Region.Candidate;
835
836 // OverallInputs are the inputs to the region found by the CodeExtractor,
837 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
838 // allocas of values whose lifetimes are contained completely within the
839 // outlined region. PremappedInputs are the arguments found by the
840 // CodeExtractor, removing conditions such as sunken allocas, but that
841 // may need to be remapped due to the extracted output values replacing
842 // the original values. We use DummyOutputs for this first run of finding
843 // inputs and outputs since the outputs could change during findAllocas,
844 // the correct set of extracted outputs will be in the final Outputs ValueSet.
845 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
846 DummyOutputs;
847
848 // Use the code extractor to get the inputs and outputs, without sunken
849 // allocas or removing llvm.assumes.
850 CodeExtractor *CE = Region.CE;
851 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
852 assert(Region.StartBB && "Region must have a start BasicBlock!");
853 Function *OrigF = Region.StartBB->getParent();
854 CodeExtractorAnalysisCache CEAC(*OrigF);
855 BasicBlock *Dummy = nullptr;
856
857 // The region may be ineligible due to VarArgs in the parent function. In this
858 // case we ignore the region.
859 if (!CE->isEligible()) {
860 Region.IgnoreRegion = true;
861 return;
862 }
863
864 // Find if any values are going to be sunk into the function when extracted
865 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
866 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
867
868 // TODO: Support regions with sunken allocas: values whose lifetimes are
869 // contained completely within the outlined region. These are not guaranteed
870 // to be the same in every region, so we must elevate them all to arguments
871 // when they appear. If these values are not equal, it means there is some
872 // Input in OverallInputs that was removed for ArgInputs.
873 if (OverallInputs.size() != PremappedInputs.size()) {
874 Region.IgnoreRegion = true;
875 return;
876 }
877
878 findConstants(C, NotSame, InputGVNs);
879
880 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
881
882 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
883 ArgInputs);
884
885 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
886 // we need to make sure they are in a deterministic order.
887 stable_sort(InputGVNs);
888}
889
890/// Look over the inputs and map each input argument to an argument in the
891/// overall function for the OutlinableRegions. This creates a way to replace
892/// the arguments of the extracted function with the arguments of the new
893/// overall function.
894///
895/// \param [in,out] Region - The region of code to be analyzed.
896/// \param [in] InputGVNs - The global value numbering of the input values
897/// collected.
898/// \param [in] ArgInputs - The values of the arguments to the extracted
899/// function.
900static void
902 std::vector<unsigned> &InputGVNs,
903 SetVector<Value *> &ArgInputs) {
904
905 IRSimilarityCandidate &C = *Region.Candidate;
906 OutlinableGroup &Group = *Region.Parent;
907
908 // This counts the argument number in the overall function.
909 unsigned TypeIndex = 0;
910
911 // This counts the argument number in the extracted function.
912 unsigned OriginalIndex = 0;
913
914 // Find the mapping of the extracted arguments to the arguments for the
915 // overall function. Since there may be extra arguments in the overall
916 // function to account for the extracted constants, we have two different
917 // counters as we find extracted arguments, and as we come across overall
918 // arguments.
919
920 // Additionally, in our first pass, for the first extracted function,
921 // we find argument locations for the canonical value numbering. This
922 // numbering overrides any discovered location for the extracted code.
923 for (unsigned InputVal : InputGVNs) {
924 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
925 assert(CanonicalNumberOpt && "Canonical number not found?");
926 unsigned CanonicalNumber = *CanonicalNumberOpt;
927
928 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
929 assert(InputOpt && "Global value number not found?");
930 Value *Input = *InputOpt;
931
933 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
934
935 if (!Group.InputTypesSet) {
936 Group.ArgumentTypes.push_back(Input->getType());
937 // If the input value has a swifterr attribute, make sure to mark the
938 // argument in the overall function.
939 if (Input->isSwiftError()) {
940 assert(
941 !Group.SwiftErrorArgument &&
942 "Argument already marked with swifterr for this OutlinableGroup!");
943 Group.SwiftErrorArgument = TypeIndex;
944 }
945 }
946
947 // Check if we have a constant. If we do add it to the overall argument
948 // number to Constant map for the region, and continue to the next input.
949 if (Constant *CST = dyn_cast<Constant>(Input)) {
950 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
951 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
952 else {
954 std::make_pair(CanonicalNumber, TypeIndex));
955 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
956 }
957 TypeIndex++;
958 continue;
959 }
960
961 // It is not a constant, we create the mapping from extracted argument list
962 // to the overall argument list, using the canonical location, if it exists.
963 assert(ArgInputs.count(Input) && "Input cannot be found!");
964
965 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
966 if (OriginalIndex != AggArgIt->second)
967 Region.ChangedArgOrder = true;
968 Region.ExtractedArgToAgg.insert(
969 std::make_pair(OriginalIndex, AggArgIt->second));
970 Region.AggArgToExtracted.insert(
971 std::make_pair(AggArgIt->second, OriginalIndex));
972 } else {
974 std::make_pair(CanonicalNumber, TypeIndex));
975 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
976 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
977 }
978 OriginalIndex++;
979 TypeIndex++;
980 }
981
982 // If the function type definitions for the OutlinableGroup holding the region
983 // have not been set, set the length of the inputs here. We should have the
984 // same inputs for all of the different regions contained in the
985 // OutlinableGroup since they are all structurally similar to one another.
986 if (!Group.InputTypesSet) {
987 Group.NumAggregateInputs = TypeIndex;
988 Group.InputTypesSet = true;
989 }
990
991 Region.NumExtractedInputs = OriginalIndex;
992}
993
994/// Check if the \p V has any uses outside of the region other than \p PN.
995///
996/// \param V [in] - The value to check.
997/// \param PHILoc [in] - The location in the PHINode of \p V.
998/// \param PN [in] - The PHINode using \p V.
999/// \param Exits [in] - The potential blocks we exit to from the outlined
1000/// region.
1001/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1002/// \returns true if \p V has any use soutside its region other than \p PN.
1003static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1005 DenseSet<BasicBlock *> &BlocksInRegion) {
1006 // We check to see if the value is used by the PHINode from some other
1007 // predecessor not included in the region. If it is, we make sure
1008 // to keep it as an output.
1010 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1011 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1012 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1013 }))
1014 return true;
1015
1016 // Check if the value is used by any other instructions outside the region.
1017 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1018 Instruction *I = dyn_cast<Instruction>(U);
1019 if (!I)
1020 return false;
1021
1022 // If the use of the item is inside the region, we skip it. Uses
1023 // inside the region give us useful information about how the item could be
1024 // used as an output.
1025 BasicBlock *Parent = I->getParent();
1026 if (BlocksInRegion.contains(Parent))
1027 return false;
1028
1029 // If it's not a PHINode then we definitely know the use matters. This
1030 // output value will not completely combined with another item in a PHINode
1031 // as it is directly reference by another non-phi instruction
1032 if (!isa<PHINode>(I))
1033 return true;
1034
1035 // If we have a PHINode outside one of the exit locations, then it
1036 // can be considered an outside use as well. If there is a PHINode
1037 // contained in the Exit where this values use matters, it will be
1038 // caught when we analyze that PHINode.
1039 if (!Exits.contains(Parent))
1040 return true;
1041
1042 return false;
1043 });
1044}
1045
1046/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1047/// considered outputs. A PHINodes is an output when more than one incoming
1048/// value has been marked by the CodeExtractor as an output.
1049///
1050/// \param CurrentExitFromRegion [in] - The block to analyze.
1051/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1052/// region.
1053/// \param RegionBlocks [in] - The basic blocks in the region.
1054/// \param Outputs [in, out] - The existing outputs for the region, we may add
1055/// PHINodes to this as we find that they replace output values.
1056/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1057/// totally replaced by a PHINode.
1058/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1059/// in PHINodes, but have other uses, and should still be considered outputs.
1061 BasicBlock *CurrentExitFromRegion,
1062 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1063 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1064 DenseSet<Value *> &OutputsReplacedByPHINode,
1065 DenseSet<Value *> &OutputsWithNonPhiUses) {
1066 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1067 // Find all incoming values from the outlining region.
1068 SmallVector<unsigned, 2> IncomingVals;
1069 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1070 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1071 IncomingVals.push_back(I);
1072
1073 // Do not process PHI if there are no predecessors from region.
1074 unsigned NumIncomingVals = IncomingVals.size();
1075 if (NumIncomingVals == 0)
1076 continue;
1077
1078 // If there is one predecessor, we mark it as a value that needs to be kept
1079 // as an output.
1080 if (NumIncomingVals == 1) {
1081 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1082 OutputsWithNonPhiUses.insert(V);
1083 OutputsReplacedByPHINode.erase(V);
1084 continue;
1085 }
1086
1087 // This PHINode will be used as an output value, so we add it to our list.
1088 Outputs.insert(&PN);
1089
1090 // Not all of the incoming values should be ignored as other inputs and
1091 // outputs may have uses in outlined region. If they have other uses
1092 // outside of the single PHINode we should not skip over it.
1093 for (unsigned Idx : IncomingVals) {
1094 Value *V = PN.getIncomingValue(Idx);
1095 if (!isa<Constant>(V) &&
1096 outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1097 OutputsWithNonPhiUses.insert(V);
1098 OutputsReplacedByPHINode.erase(V);
1099 continue;
1100 }
1101 if (!OutputsWithNonPhiUses.contains(V))
1102 OutputsReplacedByPHINode.insert(V);
1103 }
1104 }
1105}
1106
1107// Represents the type for the unsigned number denoting the output number for
1108// phi node, along with the canonical number for the exit block.
1109using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1110// The list of canonical numbers for the incoming values to a PHINode.
1112// The pair type representing the set of canonical values being combined in the
1113// PHINode, along with the location data for the PHINode.
1114using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1115
1116/// Encode \p PND as an integer for easy lookup based on the argument location,
1117/// the parent BasicBlock canonical numbering, and the canonical numbering of
1118/// the values stored in the PHINode.
1119///
1120/// \param PND - The data to hash.
1121/// \returns The hash code of \p PND.
1123 return llvm::hash_combine(llvm::hash_value(PND.first.first),
1124 llvm::hash_value(PND.first.second),
1125 llvm::hash_combine_range(PND.second));
1126}
1127
1128/// Create a special GVN for PHINodes that will be used outside of
1129/// the region. We create a hash code based on the Canonical number of the
1130/// parent BasicBlock, the canonical numbering of the values stored in the
1131/// PHINode and the aggregate argument location. This is used to find whether
1132/// this PHINode type has been given a canonical numbering already. If not, we
1133/// assign it a value and store it for later use. The value is returned to
1134/// identify different output schemes for the set of regions.
1135///
1136/// \param Region - The region that \p PN is an output for.
1137/// \param PN - The PHINode we are analyzing.
1138/// \param Blocks - The blocks for the region we are analyzing.
1139/// \param AggArgIdx - The argument \p PN will be stored into.
1140/// \returns An optional holding the assigned canonical number, or std::nullopt
1141/// if there is some attribute of the PHINode blocking it from being used.
1142static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1143 PHINode *PN,
1144 DenseSet<BasicBlock *> &Blocks,
1145 unsigned AggArgIdx) {
1146 OutlinableGroup &Group = *Region.Parent;
1147 IRSimilarityCandidate &Cand = *Region.Candidate;
1148 BasicBlock *PHIBB = PN->getParent();
1149 CanonList PHIGVNs;
1150 Value *Incoming;
1151 BasicBlock *IncomingBlock;
1152 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1153 Incoming = PN->getIncomingValue(Idx);
1154 IncomingBlock = PN->getIncomingBlock(Idx);
1155 // If the incoming block isn't in the region, we don't have to worry about
1156 // this incoming value.
1157 if (!Blocks.contains(IncomingBlock))
1158 continue;
1159
1160 // If we cannot find a GVN, and the incoming block is included in the region
1161 // this means that the input to the PHINode is not included in the region we
1162 // are trying to analyze, meaning, that if it was outlined, we would be
1163 // adding an extra input. We ignore this case for now, and so ignore the
1164 // region.
1165 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1166 if (!OGVN) {
1167 Region.IgnoreRegion = true;
1168 return std::nullopt;
1169 }
1170
1171 // Collect the canonical numbers of the values in the PHINode.
1172 unsigned GVN = *OGVN;
1173 OGVN = Cand.getCanonicalNum(GVN);
1174 assert(OGVN && "No GVN found for incoming value?");
1175 PHIGVNs.push_back(*OGVN);
1176
1177 // Find the incoming block and use the canonical numbering as well to define
1178 // the hash for the PHINode.
1179 OGVN = Cand.getGVN(IncomingBlock);
1180
1181 // If there is no number for the incoming block, it is because we have
1182 // split the candidate basic blocks. So we use the previous block that it
1183 // was split from to find the valid global value numbering for the PHINode.
1184 if (!OGVN) {
1185 assert(Cand.getStartBB() == IncomingBlock &&
1186 "Unknown basic block used in exit path PHINode.");
1187
1188 BasicBlock *PrevBlock = nullptr;
1189 // Iterate over the predecessors to the incoming block of the
1190 // PHINode, when we find a block that is not contained in the region
1191 // we know that this is the first block that we split from, and should
1192 // have a valid global value numbering.
1193 for (BasicBlock *Pred : predecessors(IncomingBlock))
1194 if (!Blocks.contains(Pred)) {
1195 PrevBlock = Pred;
1196 break;
1197 }
1198 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1199 OGVN = Cand.getGVN(PrevBlock);
1200 }
1201 GVN = *OGVN;
1202 OGVN = Cand.getCanonicalNum(GVN);
1203 assert(OGVN && "No GVN found for incoming block?");
1204 PHIGVNs.push_back(*OGVN);
1205 }
1206
1207 // Now that we have the GVNs for the incoming values, we are going to combine
1208 // them with the GVN of the incoming bock, and the output location of the
1209 // PHINode to generate a hash value representing this instance of the PHINode.
1212 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1213 assert(BBGVN && "Could not find GVN for the incoming block!");
1214
1215 BBGVN = Cand.getCanonicalNum(*BBGVN);
1216 assert(BBGVN && "Could not find canonical number for the incoming block!");
1217 // Create a pair of the exit block canonical value, and the aggregate
1218 // argument location, connected to the canonical numbers stored in the
1219 // PHINode.
1220 PHINodeData TemporaryPair =
1221 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1222 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1223
1224 // Look for and create a new entry in our connection between canonical
1225 // numbers for PHINodes, and the set of objects we just created.
1226 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1227 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1228 bool Inserted = false;
1229 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1230 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1231 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1232 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1233 }
1234
1235 return GVNToPHIIt->second;
1236}
1237
1238/// Create a mapping of the output arguments for the \p Region to the output
1239/// arguments of the overall outlined function.
1240///
1241/// \param [in,out] Region - The region of code to be analyzed.
1242/// \param [in] Outputs - The values found by the code extractor.
1243static void
1245 SetVector<Value *> &Outputs) {
1246 OutlinableGroup &Group = *Region.Parent;
1247 IRSimilarityCandidate &C = *Region.Candidate;
1248
1250 DenseSet<BasicBlock *> BlocksInRegion;
1251 C.getBasicBlocks(BlocksInRegion, BE);
1252
1253 // Find the exits to the region.
1255 for (BasicBlock *Block : BE)
1256 for (BasicBlock *Succ : successors(Block))
1257 if (!BlocksInRegion.contains(Succ))
1258 Exits.insert(Succ);
1259
1260 // After determining which blocks exit to PHINodes, we add these PHINodes to
1261 // the set of outputs to be processed. We also check the incoming values of
1262 // the PHINodes for whether they should no longer be considered outputs.
1263 DenseSet<Value *> OutputsReplacedByPHINode;
1264 DenseSet<Value *> OutputsWithNonPhiUses;
1265 for (BasicBlock *ExitBB : Exits)
1266 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1267 OutputsReplacedByPHINode,
1268 OutputsWithNonPhiUses);
1269
1270 // This counts the argument number in the extracted function.
1271 unsigned OriginalIndex = Region.NumExtractedInputs;
1272
1273 // This counts the argument number in the overall function.
1274 unsigned TypeIndex = Group.NumAggregateInputs;
1275 bool TypeFound;
1276 DenseSet<unsigned> AggArgsUsed;
1277
1278 // Iterate over the output types and identify if there is an aggregate pointer
1279 // type whose base type matches the current output type. If there is, we mark
1280 // that we will use this output register for this value. If not we add another
1281 // type to the overall argument type list. We also store the GVNs used for
1282 // stores to identify which values will need to be moved into an special
1283 // block that holds the stores to the output registers.
1284 for (Value *Output : Outputs) {
1285 TypeFound = false;
1286 // We can do this since it is a result value, and will have a number
1287 // that is necessarily the same. BUT if in the future, the instructions
1288 // do not have to be in same order, but are functionally the same, we will
1289 // have to use a different scheme, as one-to-one correspondence is not
1290 // guaranteed.
1291 unsigned ArgumentSize = Group.ArgumentTypes.size();
1292
1293 // If the output is combined in a PHINode, we make sure to skip over it.
1294 if (OutputsReplacedByPHINode.contains(Output))
1295 continue;
1296
1297 unsigned AggArgIdx = 0;
1298 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1299 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1300 continue;
1301
1302 if (!AggArgsUsed.insert(Jdx).second)
1303 continue;
1304
1305 TypeFound = true;
1306 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1307 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1308 AggArgIdx = Jdx;
1309 break;
1310 }
1311
1312 // We were unable to find an unused type in the output type set that matches
1313 // the output, so we add a pointer type to the argument types of the overall
1314 // function to handle this output and create a mapping to it.
1315 if (!TypeFound) {
1316 Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
1317 M.getDataLayout().getAllocaAddrSpace()));
1318 // Mark the new pointer type as the last value in the aggregate argument
1319 // list.
1320 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1321 AggArgsUsed.insert(ArgTypeIdx);
1322 Region.ExtractedArgToAgg.insert(
1323 std::make_pair(OriginalIndex, ArgTypeIdx));
1324 Region.AggArgToExtracted.insert(
1325 std::make_pair(ArgTypeIdx, OriginalIndex));
1326 AggArgIdx = ArgTypeIdx;
1327 }
1328
1329 // TODO: Adapt to the extra input from the PHINode.
1330 PHINode *PN = dyn_cast<PHINode>(Output);
1331
1332 std::optional<unsigned> GVN;
1333 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1334 // Values outside the region can be combined into PHINode when we
1335 // have multiple exits. We collect both of these into a list to identify
1336 // which values are being used in the PHINode. Each list identifies a
1337 // different PHINode, and a different output. We store the PHINode as it's
1338 // own canonical value. These canonical values are also dependent on the
1339 // output argument it is saved to.
1340
1341 // If two PHINodes have the same canonical values, but different aggregate
1342 // argument locations, then they will have distinct Canonical Values.
1343 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1344 if (!GVN)
1345 return;
1346 } else {
1347 // If we do not have a PHINode we use the global value numbering for the
1348 // output value, to find the canonical number to add to the set of stored
1349 // values.
1350 GVN = C.getGVN(Output);
1351 GVN = C.getCanonicalNum(*GVN);
1352 }
1353
1354 // Each region has a potentially unique set of outputs. We save which
1355 // values are output in a list of canonical values so we can differentiate
1356 // among the different store schemes.
1357 Region.GVNStores.push_back(*GVN);
1358
1359 OriginalIndex++;
1360 TypeIndex++;
1361 }
1362
1363 // We sort the stored values to make sure that we are not affected by analysis
1364 // order when determining what combination of items were stored.
1365 stable_sort(Region.GVNStores);
1366}
1367
1368void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1369 DenseSet<unsigned> &NotSame) {
1370 std::vector<unsigned> Inputs;
1371 SetVector<Value *> ArgInputs, Outputs;
1372
1373 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1374 Outputs);
1375
1376 if (Region.IgnoreRegion)
1377 return;
1378
1379 // Map the inputs found by the CodeExtractor to the arguments found for
1380 // the overall function.
1381 findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
1382
1383 // Map the outputs found by the CodeExtractor to the arguments found for
1384 // the overall function.
1386}
1387
1388/// Replace the extracted function in the Region with a call to the overall
1389/// function constructed from the deduplicated similar regions, replacing and
1390/// remapping the values passed to the extracted function as arguments to the
1391/// new arguments of the overall function.
1392///
1393/// \param [in] M - The module to outline from.
1394/// \param [in] Region - The regions of extracted code to be replaced with a new
1395/// function.
1396/// \returns a call instruction with the replaced function.
1398 std::vector<Value *> NewCallArgs;
1400
1401 OutlinableGroup &Group = *Region.Parent;
1402 CallInst *Call = Region.Call;
1403 assert(Call && "Call to replace is nullptr?");
1404 Function *AggFunc = Group.OutlinedFunction;
1405 assert(AggFunc && "Function to replace with is nullptr?");
1406
1407 // If the arguments are the same size, there are not values that need to be
1408 // made into an argument, the argument ordering has not been change, or
1409 // different output registers to handle. We can simply replace the called
1410 // function in this case.
1411 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1412 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1413 << *AggFunc << " with same number of arguments\n");
1414 Call->setCalledFunction(AggFunc);
1415 return Call;
1416 }
1417
1418 // We have a different number of arguments than the new function, so
1419 // we need to use our previously mappings off extracted argument to overall
1420 // function argument, and constants to overall function argument to create the
1421 // new argument list.
1422 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1423
1424 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1425 Group.OutputGVNCombinations.size() > 1) {
1426 // If we are on the last argument, and we need to differentiate between
1427 // output blocks, add an integer to the argument list to determine
1428 // what block to take
1429 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1430 << Region.OutputBlockNum << "\n");
1431 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1432 Region.OutputBlockNum));
1433 continue;
1434 }
1435
1436 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1437 if (ArgPair != Region.AggArgToExtracted.end()) {
1438 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1439 // If we found the mapping from the extracted function to the overall
1440 // function, we simply add it to the argument list. We use the same
1441 // value, it just needs to honor the new order of arguments.
1442 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1443 << *ArgumentValue << "\n");
1444 NewCallArgs.push_back(ArgumentValue);
1445 continue;
1446 }
1447
1448 // If it is a constant, we simply add it to the argument list as a value.
1449 if (auto It = Region.AggArgToConstant.find(AggArgIdx);
1450 It != Region.AggArgToConstant.end()) {
1451 Constant *CST = It->second;
1452 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1453 << *CST << "\n");
1454 NewCallArgs.push_back(CST);
1455 continue;
1456 }
1457
1458 // Add a nullptr value if the argument is not found in the extracted
1459 // function. If we cannot find a value, it means it is not in use
1460 // for the region, so we should not pass anything to it.
1461 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1462 NewCallArgs.push_back(ConstantPointerNull::get(
1463 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1464 }
1465
1466 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1467 << *AggFunc << " with new set of arguments\n");
1468 // Create the new call instruction and erase the old one.
1469 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1470 Call->getIterator());
1471
1472 // It is possible that the call to the outlined function is either the first
1473 // instruction is in the new block, the last instruction, or both. If either
1474 // of these is the case, we need to make sure that we replace the instruction
1475 // in the IRInstructionData struct with the new call.
1476 CallInst *OldCall = Region.Call;
1477 if (Region.NewFront->Inst == OldCall)
1478 Region.NewFront->Inst = Call;
1479 if (Region.NewBack->Inst == OldCall)
1480 Region.NewBack->Inst = Call;
1481
1482 // Transfer any debug information.
1483 Call->setDebugLoc(Region.Call->getDebugLoc());
1484 // Since our output may determine which branch we go to, we make sure to
1485 // propagate this new call value through the module.
1486 OldCall->replaceAllUsesWith(Call);
1487
1488 // Remove the old instruction.
1489 OldCall->eraseFromParent();
1490 Region.Call = Call;
1491
1492 // Make sure that the argument in the new function has the SwiftError
1493 // argument.
1494 if (Group.SwiftErrorArgument)
1495 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1496
1497 return Call;
1498}
1499
1500/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1501/// for \p RetVal.
1502///
1503/// \param Group - The OutlinableGroup containing the information about the
1504/// overall outlined function.
1505/// \param RetVal - The return value or exit option that we are currently
1506/// evaluating.
1507/// \returns The found or newly created BasicBlock to contain the needed
1508/// PHINodes to be used as outputs.
1510 // Find if a PHIBlock exists for this return value already. If it is
1511 // the first time we are analyzing this, we will not, so we record it.
1512 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(RetVal);
1513 if (!Inserted)
1514 return PhiBlockForRetVal->second;
1515
1516 auto ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1517 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1518 "Could not find output value!");
1519 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1520
1521 // If we did not find a block, we create one, and insert it into the
1522 // overall function and record it.
1523 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1524 ReturnBB->getParent());
1525 PhiBlockForRetVal->second = PHIBlock;
1526
1527 // We replace all branches to the return block in the newly created outlined
1528 // function to point to the new PHIBlock.
1529 ReturnBB->replaceAllUsesWith(PHIBlock);
1530
1531 UncondBrInst::Create(ReturnBB, PHIBlock);
1532
1533 return PhiBlockForRetVal->second;
1534}
1535
1536/// For the function call now representing the \p Region, find the passed value
1537/// to that call that represents Argument \p A at the call location if the
1538/// call has already been replaced with a call to the overall, aggregate
1539/// function.
1540///
1541/// \param A - The Argument to get the passed value for.
1542/// \param Region - The extracted Region corresponding to the outlined function.
1543/// \returns The Value representing \p A at the call site.
1544static Value *
1546 const OutlinableRegion &Region) {
1547 // If we don't need to adjust the argument number at all (since the call
1548 // has already been replaced by a call to the overall outlined function)
1549 // we can just get the specified argument.
1550 return Region.Call->getArgOperand(A->getArgNo());
1551}
1552
1553/// For the function call now representing the \p Region, find the passed value
1554/// to that call that represents Argument \p A at the call location if the
1555/// call has only been replaced by the call to the aggregate function.
1556///
1557/// \param A - The Argument to get the passed value for.
1558/// \param Region - The extracted Region corresponding to the outlined function.
1559/// \returns The Value representing \p A at the call site.
1560static Value *
1562 const OutlinableRegion &Region) {
1563 unsigned ArgNum = A->getArgNo();
1564
1565 // If it is a constant, we can look at our mapping from when we created
1566 // the outputs to figure out what the constant value is.
1567 if (auto It = Region.AggArgToConstant.find(ArgNum);
1568 It != Region.AggArgToConstant.end())
1569 return It->second;
1570
1571 // If it is not a constant, and we are not looking at the overall function, we
1572 // need to adjust which argument we are looking at.
1573 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1574 return Region.Call->getArgOperand(ArgNum);
1575}
1576
1577/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1578///
1579/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1580/// \param Region [in] - The OutlinableRegion containing \p PN.
1581/// \param OutputMappings [in] - The mapping of output values from outlined
1582/// region to their original values.
1583/// \param CanonNums [out] - The canonical numbering for the incoming values to
1584/// \p PN paired with their incoming block.
1585/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1586/// of \p Region rather than the overall function's call.
1589 const DenseMap<Value *, Value *> &OutputMappings,
1590 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1591 bool ReplacedWithOutlinedCall = true) {
1592 // Iterate over the incoming values.
1593 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1594 Value *IVal = PN->getIncomingValue(Idx);
1595 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1596 // If we have an argument as incoming value, we need to grab the passed
1597 // value from the call itself.
1598 if (Argument *A = dyn_cast<Argument>(IVal)) {
1599 if (ReplacedWithOutlinedCall)
1601 else
1603 }
1604
1605 // Get the original value if it has been replaced by an output value.
1606 IVal = findOutputMapping(OutputMappings, IVal);
1607
1608 // Find and add the canonical number for the incoming value.
1609 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1610 assert(GVN && "No GVN for incoming value");
1611 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1612 assert(CanonNum && "No Canonical Number for GVN");
1613 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1614 }
1615}
1616
1617/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1618/// in order to condense the number of instructions added to the outlined
1619/// function.
1620///
1621/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1622/// \param Region [in] - The OutlinableRegion containing \p PN.
1623/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1624/// \p PN in.
1625/// \param OutputMappings [in] - The mapping of output values from outlined
1626/// region to their original values.
1627/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1628/// matched.
1629/// \return the newly found or created PHINode in \p OverallPhiBlock.
1630static PHINode*
1632 BasicBlock *OverallPhiBlock,
1633 const DenseMap<Value *, Value *> &OutputMappings,
1634 DenseSet<PHINode *> &UsedPHIs) {
1635 OutlinableGroup &Group = *Region.Parent;
1636
1637
1638 // A list of the canonical numbering assigned to each incoming value, paired
1639 // with the incoming block for the PHINode passed into this function.
1641
1642 // We have to use the extracted function since we have merged this region into
1643 // the overall function yet. We make sure to reassign the argument numbering
1644 // since it is possible that the argument ordering is different between the
1645 // functions.
1646 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1647 /* ReplacedWithOutlinedCall = */ false);
1648
1649 OutlinableRegion *FirstRegion = Group.Regions[0];
1650
1651 // A list of the canonical numbering assigned to each incoming value, paired
1652 // with the incoming block for the PHINode that we are currently comparing
1653 // the passed PHINode to.
1655
1656 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1657 // the uses of the PHINode we are searching for, with the found PHINode.
1658 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1659 // If this PHINode has already been matched to another PHINode to be merged,
1660 // we skip it.
1661 if (UsedPHIs.contains(&CurrPN))
1662 continue;
1663
1664 CurrentCanonNums.clear();
1665 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1666 /* ReplacedWithOutlinedCall = */ true);
1667
1668 // If the list of incoming values is not the same length, then they cannot
1669 // match since there is not an analogue for each incoming value.
1670 if (PNCanonNums.size() != CurrentCanonNums.size())
1671 continue;
1672
1673 bool FoundMatch = true;
1674
1675 // We compare the canonical value for each incoming value in the passed
1676 // in PHINode to one already present in the outlined region. If the
1677 // incoming values do not match, then the PHINodes do not match.
1678
1679 // We also check to make sure that the incoming block matches as well by
1680 // finding the corresponding incoming block in the combined outlined region
1681 // for the current outlined region.
1682 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1683 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1684 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1685 if (ToCompareTo.first != ToAdd.first) {
1686 FoundMatch = false;
1687 break;
1688 }
1689
1690 BasicBlock *CorrespondingBlock =
1691 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1692 assert(CorrespondingBlock && "Found block is nullptr");
1693 if (CorrespondingBlock != ToCompareTo.second) {
1694 FoundMatch = false;
1695 break;
1696 }
1697 }
1698
1699 // If all incoming values and branches matched, then we can merge
1700 // into the found PHINode.
1701 if (FoundMatch) {
1702 UsedPHIs.insert(&CurrPN);
1703 return &CurrPN;
1704 }
1705 }
1706
1707 // If we've made it here, it means we weren't able to replace the PHINode, so
1708 // we must insert it ourselves.
1709 PHINode *NewPN = cast<PHINode>(PN.clone());
1710 NewPN->insertBefore(OverallPhiBlock->begin());
1711 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1712 Idx++) {
1713 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1714 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1715
1716 // Find corresponding basic block in the overall function for the incoming
1717 // block.
1718 BasicBlock *BlockToUse =
1719 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1720 NewPN->setIncomingBlock(Idx, BlockToUse);
1721
1722 // If we have an argument we make sure we replace using the argument from
1723 // the correct function.
1724 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1725 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1726 NewPN->setIncomingValue(Idx, Val);
1727 continue;
1728 }
1729
1730 // Find the corresponding value in the overall function.
1731 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1732 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1733 assert(Val && "Value is nullptr?");
1735 FirstRegion->RemappedArguments.find(Val);
1736 if (RemappedIt != FirstRegion->RemappedArguments.end())
1737 Val = RemappedIt->second;
1738 NewPN->setIncomingValue(Idx, Val);
1739 }
1740 return NewPN;
1741}
1742
1743// Within an extracted function, replace the argument uses of the extracted
1744// region with the arguments of the function for an OutlinableGroup.
1745//
1746/// \param [in] Region - The region of extracted code to be changed.
1747/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1748/// region.
1749/// \param [in] FirstFunction - A flag to indicate whether we are using this
1750/// function to define the overall outlined function for all the regions, or
1751/// if we are operating on one of the following regions.
1752static void
1755 const DenseMap<Value *, Value *> &OutputMappings,
1756 bool FirstFunction = false) {
1757 OutlinableGroup &Group = *Region.Parent;
1758 assert(Region.ExtractedFunction && "Region has no extracted function?");
1759
1760 Function *DominatingFunction = Region.ExtractedFunction;
1761 if (FirstFunction)
1762 DominatingFunction = Group.OutlinedFunction;
1763 DominatorTree DT(*DominatingFunction);
1764 DenseSet<PHINode *> UsedPHIs;
1765
1766 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1767 ArgIdx++) {
1768 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1769 "No mapping from extracted to outlined?");
1770 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1771 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1772 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1773 // The argument is an input, so we can simply replace it with the overall
1774 // argument value
1775 if (ArgIdx < Region.NumExtractedInputs) {
1776 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1777 << *Region.ExtractedFunction << " with " << *AggArg
1778 << " in function " << *Group.OutlinedFunction << "\n");
1779 Arg->replaceAllUsesWith(AggArg);
1780 Value *V = Region.Call->getArgOperand(ArgIdx);
1781 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1782 continue;
1783 }
1784
1785 // If we are replacing an output, we place the store value in its own
1786 // block inside the overall function before replacing the use of the output
1787 // in the function.
1788 assert(Arg->hasOneUse() && "Output argument can only have one use");
1789 User *InstAsUser = Arg->user_back();
1790 assert(InstAsUser && "User is nullptr!");
1791
1792 Instruction *I = cast<Instruction>(InstAsUser);
1793 BasicBlock *BB = I->getParent();
1794 SmallVector<BasicBlock *, 4> Descendants;
1795 DT.getDescendants(BB, Descendants);
1796 bool EdgeAdded = false;
1797 if (Descendants.size() == 0) {
1798 EdgeAdded = true;
1799 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1800 DT.getDescendants(BB, Descendants);
1801 }
1802
1803 // Iterate over the following blocks, looking for return instructions,
1804 // if we find one, find the corresponding output block for the return value
1805 // and move our store instruction there.
1806 for (BasicBlock *DescendBB : Descendants) {
1807 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1808 if (!RI)
1809 continue;
1810 Value *RetVal = RI->getReturnValue();
1811 auto VBBIt = OutputBBs.find(RetVal);
1812 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1813
1814 // If this is storing a PHINode, we must make sure it is included in the
1815 // overall function.
1817
1818 Value *ValueOperand = SI->getValueOperand();
1819
1820 StoreInst *NewI = cast<StoreInst>(I->clone());
1822 BasicBlock *OutputBB = VBBIt->second;
1823 NewI->insertInto(OutputBB, OutputBB->end());
1824 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1825 << *OutputBB << "\n");
1826
1827 // If this is storing a PHINode, we must make sure it is included in the
1828 // overall function.
1829 if (!isa<PHINode>(ValueOperand) ||
1830 Region.Candidate->getGVN(ValueOperand).has_value()) {
1831 if (FirstFunction)
1832 continue;
1833 Value *CorrVal =
1834 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1835 assert(CorrVal && "Value is nullptr?");
1836 NewI->setOperand(0, CorrVal);
1837 continue;
1838 }
1839 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1840 // If it has a value, it was not split by the code extractor, which
1841 // is what we are looking for.
1842 if (Region.Candidate->getGVN(PN))
1843 continue;
1844
1845 // We record the parent block for the PHINode in the Region so that
1846 // we can exclude it from checks later on.
1847 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1848
1849 // If this is the first function, we do not need to worry about mergiing
1850 // this with any other block in the overall outlined function, so we can
1851 // just continue.
1852 if (FirstFunction) {
1853 BasicBlock *PHIBlock = PN->getParent();
1854 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1855 continue;
1856 }
1857
1858 // We look for the aggregate block that contains the PHINodes leading into
1859 // this exit path. If we can't find one, we create one.
1860 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1861
1862 // For our PHINode, we find the combined canonical numbering, and
1863 // attempt to find a matching PHINode in the overall PHIBlock. If we
1864 // cannot, we copy the PHINode and move it into this new block.
1865 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1866 OutputMappings, UsedPHIs);
1867 NewI->setOperand(0, NewPN);
1868 }
1869
1870 // If we added an edge for basic blocks without a predecessor, we remove it
1871 // here.
1872 if (EdgeAdded)
1873 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1874 I->eraseFromParent();
1875
1876 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1877 << *Region.ExtractedFunction << " with " << *AggArg
1878 << " in function " << *Group.OutlinedFunction << "\n");
1879 Arg->replaceAllUsesWith(AggArg);
1880 }
1881}
1882
1883/// Within an extracted function, replace the constants that need to be lifted
1884/// into arguments with the actual argument.
1885///
1886/// \param Region [in] - The region of extracted code to be changed.
1888 OutlinableGroup &Group = *Region.Parent;
1889 Function *OutlinedFunction = Group.OutlinedFunction;
1890 ValueToValueMapTy VMap;
1891
1892 // Iterate over the constants that need to be elevated into arguments
1893 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1894 unsigned AggArgIdx = Const.first;
1895 assert(OutlinedFunction && "Overall Function is not defined?");
1896 Constant *CST = Const.second;
1897 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1898 // Identify the argument it will be elevated to, and replace instances of
1899 // that constant in the function.
1900 VMap[CST] = Arg;
1901 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1902 << " in function " << *OutlinedFunction << " with "
1903 << *Arg << '\n');
1904 }
1905
1906 RemapFunction(*OutlinedFunction, VMap,
1908}
1909
1910/// It is possible that there is a basic block that already performs the same
1911/// stores. This returns a duplicate block, if it exists
1912///
1913/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1914/// \param OutputStoreBBs [in] The existing output blocks.
1915/// \returns an optional value with the number output block if there is a match.
1916std::optional<unsigned> findDuplicateOutputBlock(
1918 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1919
1920 bool Mismatch = false;
1921 unsigned MatchingNum = 0;
1922 // We compare the new set output blocks to the other sets of output blocks.
1923 // If they are the same number, and have identical instructions, they are
1924 // considered to be the same.
1925 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1926 Mismatch = false;
1927 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1929 OutputBBs.find(VToB.first);
1930 if (OutputBBIt == OutputBBs.end()) {
1931 Mismatch = true;
1932 break;
1933 }
1934
1935 BasicBlock *CompBB = VToB.second;
1936 BasicBlock *OutputBB = OutputBBIt->second;
1937 if (CompBB->size() - 1 != OutputBB->size()) {
1938 Mismatch = true;
1939 break;
1940 }
1941
1942 BasicBlock::iterator NIt = OutputBB->begin();
1943 for (Instruction &I : *CompBB) {
1945 continue;
1946
1947 if (!I.isIdenticalTo(&(*NIt))) {
1948 Mismatch = true;
1949 break;
1950 }
1951
1952 NIt++;
1953 }
1954 }
1955
1956 if (!Mismatch)
1957 return MatchingNum;
1958
1959 MatchingNum++;
1960 }
1961
1962 return std::nullopt;
1963}
1964
1965/// Remove empty output blocks from the outlined region.
1966///
1967/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1968/// Region.
1969/// \param Region - The OutlinableRegion we are analyzing.
1970static bool
1973 bool AllRemoved = true;
1974 Value *RetValueForBB;
1975 BasicBlock *NewBB;
1977 // Iterate over the output blocks created in the outlined section.
1978 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1979 RetValueForBB = VtoBB.first;
1980 NewBB = VtoBB.second;
1981
1982 // If there are no instructions, we remove it from the module, and also
1983 // mark the value for removal from the return value to output block mapping.
1984 if (NewBB->size() == 0) {
1985 NewBB->eraseFromParent();
1986 ToRemove.push_back(RetValueForBB);
1987 continue;
1988 }
1989
1990 // Mark that we could not remove all the blocks since they were not all
1991 // empty.
1992 AllRemoved = false;
1993 }
1994
1995 // Remove the return value from the mapping.
1996 for (Value *V : ToRemove)
1997 BlocksToPrune.erase(V);
1998
1999 // Mark the region as having the no output scheme.
2000 if (AllRemoved)
2001 Region.OutputBlockNum = -1;
2002
2003 return AllRemoved;
2004}
2005
2006/// For the outlined section, move needed the StoreInsts for the output
2007/// registers into their own block. Then, determine if there is a duplicate
2008/// output block already created.
2009///
2010/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2011/// \param [in] Region - The OutlinableRegion that is being analyzed.
2012/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2013/// placed in.
2014/// \param [in] EndBBs - the final blocks of the extracted function.
2015/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2016/// been replaced by a new output value.
2017/// \param [in,out] OutputStoreBBs - The existing output blocks.
2022 const DenseMap<Value *, Value *> &OutputMappings,
2023 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2024 // If none of the output blocks have any instructions, this means that we do
2025 // not have to determine if it matches any of the other output schemes, and we
2026 // don't have to do anything else.
2027 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2028 return;
2029
2030 // Determine is there is a duplicate set of blocks.
2031 std::optional<unsigned> MatchingBB =
2032 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2033
2034 // If there is, we remove the new output blocks. If it does not,
2035 // we add it to our list of sets of output blocks.
2036 if (MatchingBB) {
2037 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2038 << Region.ExtractedFunction << " to " << *MatchingBB);
2039
2040 Region.OutputBlockNum = *MatchingBB;
2041 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2042 VtoBB.second->eraseFromParent();
2043 return;
2044 }
2045
2046 Region.OutputBlockNum = OutputStoreBBs.size();
2047
2048 Value *RetValueForBB;
2049 BasicBlock *NewBB;
2050 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2051 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2052 RetValueForBB = VtoBB.first;
2053 NewBB = VtoBB.second;
2055 EndBBs.find(RetValueForBB);
2056 LLVM_DEBUG(dbgs() << "Create output block for region in"
2057 << Region.ExtractedFunction << " to "
2058 << *NewBB);
2059 UncondBrInst::Create(VBBIt->second, NewBB);
2060 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2061 }
2062}
2063
2064/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2065/// before creating a basic block for each \p NewMap, and inserting into the new
2066/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2067///
2068/// \param OldMap [in] - The mapping to base the new mapping off of.
2069/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2070/// \param ParentFunc [in] - The function to put the new basic block in.
2071/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2072/// by an index value.
2075 Function *ParentFunc, Twine BaseName) {
2076 unsigned Idx = 0;
2077 std::vector<Value *> SortedKeys;
2078
2079 getSortedConstantKeys(SortedKeys, OldMap);
2080
2081 for (Value *RetVal : SortedKeys) {
2083 ParentFunc->getContext(), Twine(BaseName) + Twine("_") + Twine(Idx++),
2084 ParentFunc);
2085 NewMap.insert(std::make_pair(RetVal, NewBB));
2086 }
2087}
2088
2089/// Create the switch statement for outlined function to differentiate between
2090/// all the output blocks.
2091///
2092/// For the outlined section, determine if an outlined block already exists that
2093/// matches the needed stores for the extracted section.
2094/// \param [in] M - The module we are outlining from.
2095/// \param [in] OG - The group of regions to be outlined.
2096/// \param [in] EndBBs - The final blocks of the extracted function.
2097/// \param [in,out] OutputStoreBBs - The existing output blocks.
2100 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2101 // We only need the switch statement if there is more than one store
2102 // combination, or there is more than one set of output blocks. The first
2103 // will occur when we store different sets of values for two different
2104 // regions. The second will occur when we have two outputs that are combined
2105 // in a PHINode outside of the region in one outlined instance, and are used
2106 // seaparately in another. This will create the same set of OutputGVNs, but
2107 // will generate two different output schemes.
2108 if (OG.OutputGVNCombinations.size() > 1) {
2109 Function *AggFunc = OG.OutlinedFunction;
2110 // Create a final block for each different return block.
2112 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2113
2114 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2115 std::pair<Value *, BasicBlock *> &OutputBlock =
2116 *OG.EndBBs.find(RetBlockPair.first);
2117 BasicBlock *ReturnBlock = RetBlockPair.second;
2118 BasicBlock *EndBB = OutputBlock.second;
2119 Instruction *Term = EndBB->getTerminator();
2120 // Move the return value to the final block instead of the original exit
2121 // stub.
2122 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2123 // Put the switch statement in the old end basic block for the function
2124 // with a fall through to the new return block.
2125 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2126 << OutputStoreBBs.size() << "\n");
2127 SwitchInst *SwitchI =
2128 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2129 ReturnBlock, OutputStoreBBs.size(), EndBB);
2130
2131 unsigned Idx = 0;
2132 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2134 OutputStoreBB.find(OutputBlock.first);
2135
2136 if (OSBBIt == OutputStoreBB.end())
2137 continue;
2138
2139 BasicBlock *BB = OSBBIt->second;
2140 SwitchI->addCase(
2141 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2142 Term = BB->getTerminator();
2143 Term->setSuccessor(0, ReturnBlock);
2144 Idx++;
2145 }
2146 }
2147 return;
2148 }
2149
2150 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2151
2152 // If there needs to be stores, move them from the output blocks to their
2153 // corresponding ending block. We do not check that the OutputGVNCombinations
2154 // is equal to 1 here since that could just been the case where there are 0
2155 // outputs. Instead, we check whether there is more than one set of output
2156 // blocks since this is the only case where we would have to move the
2157 // stores, and erase the extraneous blocks.
2158 if (OutputStoreBBs.size() == 1) {
2159 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2160 << *OG.OutlinedFunction << "\n");
2161 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2162 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2164 EndBBs.find(VBPair.first);
2165 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2166 BasicBlock *EndBB = EndBBIt->second;
2167 BasicBlock *OutputBB = VBPair.second;
2168 Instruction *Term = OutputBB->getTerminator();
2169 Term->eraseFromParent();
2170 Term = EndBB->getTerminator();
2171 moveBBContents(*OutputBB, *EndBB);
2172 Term->moveBefore(*EndBB, EndBB->end());
2173 OutputBB->eraseFromParent();
2174 }
2175 }
2176}
2177
2178void IROutliner::fillOverallFunction(
2179 Module &M, OutlinableGroup &CurrentGroup,
2180 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2181 std::vector<Function *> &FuncsToRemove) {
2182 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2183
2184 TargetTransformInfo &TTI = getTTI(*CurrentOS->StartBB->getParent());
2185
2186 // Move first extracted function's instructions into new function.
2187 LLVM_DEBUG(dbgs() << "Move instructions from "
2188 << *CurrentOS->ExtractedFunction << " to instruction "
2189 << *CurrentGroup.OutlinedFunction << "\n");
2191 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2192
2193 // Transfer the attributes from the function to the new function.
2194 for (Attribute A :
2195 CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) {
2197 A))
2198 continue;
2199
2200 CurrentGroup.OutlinedFunction->addFnAttr(A);
2201 }
2202
2203 // Create a new set of output blocks for the first extracted function.
2204 DenseMap<Value *, BasicBlock *> NewBBs;
2205 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2206 CurrentGroup.OutlinedFunction, "output_block_0");
2207 CurrentOS->OutputBlockNum = 0;
2208
2209 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2210 replaceConstants(*CurrentOS);
2211
2212 // We first identify if any output blocks are empty, if they are we remove
2213 // them. We then create a branch instruction to the basic block to the return
2214 // block for the function for each non empty output block.
2215 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2216 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2217 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2219 CurrentGroup.EndBBs.find(VToBB.first);
2220 BasicBlock *EndBB = VBBIt->second;
2221 UncondBrInst::Create(EndBB, VToBB.second);
2222 OutputStoreBBs.back().insert(VToBB);
2223 }
2224 }
2225
2226 // Replace the call to the extracted function with the outlined function.
2227 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2228
2229 // We only delete the extracted functions at the end since we may need to
2230 // reference instructions contained in them for mapping purposes.
2231 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2232}
2233
2234void IROutliner::deduplicateExtractedSections(
2235 Module &M, OutlinableGroup &CurrentGroup,
2236 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2237 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2238
2239 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2240
2241 OutlinableRegion *CurrentOS;
2242
2243 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
2244
2245 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2246 CurrentOS = CurrentGroup.Regions[Idx];
2247 AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
2248 *CurrentOS->ExtractedFunction);
2249
2250 // Create a set of BasicBlocks, one for each return block, to hold the
2251 // needed store instructions.
2252 DenseMap<Value *, BasicBlock *> NewBBs;
2253 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2254 CurrentGroup.OutlinedFunction,
2255 "output_block_" + Twine(Idx));
2256 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2257 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2258 CurrentGroup.EndBBs, OutputMappings,
2259 OutputStoreBBs);
2260
2261 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2262 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2263 }
2264
2265 // Create a switch statement to handle the different output schemes.
2266 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2267
2268 OutlinedFunctionNum++;
2269}
2270
2271/// Checks that the next instruction in the InstructionDataList matches the
2272/// next instruction in the module. If they do not, there could be the
2273/// possibility that extra code has been inserted, and we must ignore it.
2274///
2275/// \param ID - The IRInstructionData to check the next instruction of.
2276/// \returns true if the InstructionDataList and actual instruction match.
2278 // We check if there is a discrepancy between the InstructionDataList
2279 // and the actual next instruction in the module. If there is, it means
2280 // that an extra instruction was added, likely by the CodeExtractor.
2281
2282 // Since we do not have any similarity data about this particular
2283 // instruction, we cannot confidently outline it, and must discard this
2284 // candidate.
2285 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2286 Instruction *NextIDLInst = NextIDIt->Inst;
2287 Instruction *NextModuleInst = nullptr;
2288 if (!ID.Inst->isTerminator())
2289 NextModuleInst = ID.Inst->getNextNode();
2290 else if (NextIDLInst != nullptr)
2291 NextModuleInst = &*NextIDIt->Inst->getParent()->begin();
2292
2293 if (NextIDLInst && NextIDLInst != NextModuleInst)
2294 return false;
2295
2296 return true;
2297}
2298
2299bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2300 const OutlinableRegion &Region) {
2301 IRSimilarityCandidate *IRSC = Region.Candidate;
2302 unsigned StartIdx = IRSC->getStartIdx();
2303 unsigned EndIdx = IRSC->getEndIdx();
2304
2305 // A check to make sure that we are not about to attempt to outline something
2306 // that has already been outlined.
2307 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2308 if (Outlined.contains(Idx))
2309 return false;
2310
2311 // We check if the recorded instruction matches the actual next instruction,
2312 // if it does not, we fix it in the InstructionDataList.
2313 if (!Region.Candidate->backInstruction()->isTerminator()) {
2314 Instruction *NewEndInst =
2315 Region.Candidate->backInstruction()->getNextNode();
2316 assert(NewEndInst && "Next instruction is a nullptr?");
2317 if (Region.Candidate->end()->Inst != NewEndInst) {
2318 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2319 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2320 IRInstructionData(*NewEndInst,
2321 InstructionClassifier.visit(*NewEndInst), *IDL);
2322
2323 // Insert the first IRInstructionData of the new region after the
2324 // last IRInstructionData of the IRSimilarityCandidate.
2325 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2326 }
2327 }
2328
2329 return none_of(*IRSC, [this](IRInstructionData &ID) {
2331 return true;
2332
2333 return !this->InstructionClassifier.visit(ID.Inst);
2334 });
2335}
2336
2337void IROutliner::pruneIncompatibleRegions(
2338 std::vector<IRSimilarityCandidate> &CandidateVec,
2339 OutlinableGroup &CurrentGroup) {
2340 bool PreviouslyOutlined;
2341
2342 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2343 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2344 const IRSimilarityCandidate &RHS) {
2345 return LHS.getStartIdx() < RHS.getStartIdx();
2346 });
2347
2348 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2349 // Since outlining a call and a branch instruction will be the same as only
2350 // outlinining a call instruction, we ignore it as a space saving.
2351 if (FirstCandidate.getLength() == 2) {
2352 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2353 isa<UncondBrInst, CondBrInst>(FirstCandidate.back()->Inst))
2354 return;
2355 }
2356
2357 unsigned CurrentEndIdx = 0;
2358 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2359 PreviouslyOutlined = false;
2360 unsigned StartIdx = IRSC.getStartIdx();
2361 unsigned EndIdx = IRSC.getEndIdx();
2362 const Function &FnForCurrCand = *IRSC.getFunction();
2363
2364 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2365 if (Outlined.contains(Idx)) {
2366 PreviouslyOutlined = true;
2367 break;
2368 }
2369
2370 if (PreviouslyOutlined)
2371 continue;
2372
2373 // Check over the instructions, and if the basic block has its address
2374 // taken for use somewhere else, we do not outline that block.
2375 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2376 return ID.Inst->getParent()->hasAddressTaken();
2377 });
2378
2379 if (BBHasAddressTaken)
2380 continue;
2381
2382 if (FnForCurrCand.hasOptNone())
2383 continue;
2384
2385 if (FnForCurrCand.hasFnAttribute(Attribute::NoOutline)) {
2386 LLVM_DEBUG({
2387 dbgs() << "... Skipping function with nooutline attribute: "
2388 << FnForCurrCand.getName() << "\n";
2389 });
2390 continue;
2391 }
2392
2393 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2394 !OutlineFromLinkODRs)
2395 continue;
2396
2397 // Greedily prune out any regions that will overlap with already chosen
2398 // regions.
2399 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2400 continue;
2401
2402 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2404 return true;
2405
2406 return !this->InstructionClassifier.visit(ID.Inst);
2407 });
2408
2409 if (BadInst)
2410 continue;
2411
2412 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2413 OutlinableRegion(IRSC, CurrentGroup);
2414 CurrentGroup.Regions.push_back(OS);
2415
2416 CurrentEndIdx = EndIdx;
2417 }
2418}
2419
2421IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2422 InstructionCost RegionBenefit = 0;
2423 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2424 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2425 // We add the number of instructions in the region to the benefit as an
2426 // estimate as to how much will be removed.
2427 RegionBenefit += Region->getBenefit(TTI);
2428 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2429 << " saved instructions to overfall benefit.\n");
2430 }
2431
2432 return RegionBenefit;
2433}
2434
2435/// For the \p OutputCanon number passed in find the value represented by this
2436/// canonical number. If it is from a PHINode, we pick the first incoming
2437/// value and return that Value instead.
2438///
2439/// \param Region - The OutlinableRegion to get the Value from.
2440/// \param OutputCanon - The canonical number to find the Value from.
2441/// \returns The Value represented by a canonical number \p OutputCanon in \p
2442/// Region.
2444 unsigned OutputCanon) {
2445 OutlinableGroup &CurrentGroup = *Region.Parent;
2446 // If the value is greater than the value in the tracker, we have a
2447 // PHINode and will instead use one of the incoming values to find the
2448 // type.
2449 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2450 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2451 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2452 "Could not find GVN set for PHINode number!");
2453 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2454 OutputCanon = *It->second.second.begin();
2455 }
2456 std::optional<unsigned> OGVN =
2457 Region.Candidate->fromCanonicalNum(OutputCanon);
2458 assert(OGVN && "Could not find GVN for Canonical Number?");
2459 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2460 assert(OV && "Could not find value for GVN?");
2461 return *OV;
2462}
2463
2465IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2466 InstructionCost OverallCost = 0;
2467 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2468 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2469
2470 // Each output incurs a load after the call, so we add that to the cost.
2471 for (unsigned OutputCanon : Region->GVNStores) {
2472 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2473 InstructionCost LoadCost =
2474 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2476
2477 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2478 << " instructions to cost for output of type "
2479 << *V->getType() << "\n");
2480 OverallCost += LoadCost;
2481 }
2482 }
2483
2484 return OverallCost;
2485}
2486
2487/// Find the extra instructions needed to handle any output values for the
2488/// region.
2489///
2490/// \param [in] M - The Module to outline from.
2491/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2492/// \param [in] TTI - The TargetTransformInfo used to collect information for
2493/// new instruction costs.
2494/// \returns the additional cost to handle the outputs.
2496 OutlinableGroup &CurrentGroup,
2498 InstructionCost OutputCost = 0;
2499 unsigned NumOutputBranches = 0;
2500
2501 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2502 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2503 DenseSet<BasicBlock *> CandidateBlocks;
2504 Candidate.getBasicBlocks(CandidateBlocks);
2505
2506 // Count the number of different output branches that point to blocks outside
2507 // of the region.
2508 DenseSet<BasicBlock *> FoundBlocks;
2509 for (IRInstructionData &ID : Candidate) {
2511 continue;
2512
2513 for (Value *V : ID.OperVals) {
2514 BasicBlock *BB = static_cast<BasicBlock *>(V);
2515 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2516 NumOutputBranches++;
2517 }
2518 }
2519
2520 CurrentGroup.BranchesToOutside = NumOutputBranches;
2521
2522 for (const ArrayRef<unsigned> &OutputUse :
2523 CurrentGroup.OutputGVNCombinations) {
2524 for (unsigned OutputCanon : OutputUse) {
2525 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2526 InstructionCost StoreCost =
2527 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2529
2530 // An instruction cost is added for each store set that needs to occur for
2531 // various output combinations inside the function, plus a branch to
2532 // return to the exit block.
2533 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2534 << " instructions to cost for output of type "
2535 << *V->getType() << "\n");
2536 OutputCost += StoreCost * NumOutputBranches;
2537 }
2538
2539 InstructionCost BranchCost = TTI.getCFInstrCost(
2540 Instruction::UncondBr, TargetTransformInfo::TCK_CodeSize);
2541 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2542 << " a branch instruction\n");
2543 OutputCost += BranchCost * NumOutputBranches;
2544 }
2545
2546 // If there is more than one output scheme, we must have a comparison and
2547 // branch for each different item in the switch statement.
2548 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2549 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2550 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2553 InstructionCost BranchCost = TTI.getCFInstrCost(
2554 Instruction::CondBr, TargetTransformInfo::TCK_CodeSize);
2555
2556 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2557 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2558
2559 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2560 << " instructions for each switch case for each different"
2561 << " output path in a function\n");
2562 OutputCost += TotalCost * NumOutputBranches;
2563 }
2564
2565 return OutputCost;
2566}
2567
2568void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2569 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2570 CurrentGroup.Benefit += RegionBenefit;
2571 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2572
2573 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2574 CurrentGroup.Cost += OutputReloadCost;
2575 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2576
2577 InstructionCost AverageRegionBenefit =
2578 RegionBenefit / CurrentGroup.Regions.size();
2579 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2580 unsigned NumRegions = CurrentGroup.Regions.size();
2581 TargetTransformInfo &TTI =
2582 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2583
2584 // We add one region to the cost once, to account for the instructions added
2585 // inside of the newly created function.
2586 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2587 << " instructions to cost for body of new function.\n");
2588 CurrentGroup.Cost += AverageRegionBenefit;
2589 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2590
2591 // For each argument, we must add an instruction for loading the argument
2592 // out of the register and into a value inside of the newly outlined function.
2593 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2594 << " instructions to cost for each argument in the new"
2595 << " function.\n");
2596 CurrentGroup.Cost +=
2597 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2598 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2599
2600 // Each argument needs to either be loaded into a register or onto the stack.
2601 // Some arguments will only be loaded into the stack once the argument
2602 // registers are filled.
2603 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2604 << " instructions to cost for each argument in the new"
2605 << " function " << NumRegions << " times for the "
2606 << "needed argument handling at the call site.\n");
2607 CurrentGroup.Cost +=
2608 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2609 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2610
2611 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2612 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2613}
2614
2615void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2616 ArrayRef<Value *> Outputs,
2617 LoadInst *LI) {
2618 // For and load instructions following the call
2619 Value *Operand = LI->getPointerOperand();
2620 std::optional<unsigned> OutputIdx;
2621 // Find if the operand it is an output register.
2622 for (unsigned ArgIdx = Region.NumExtractedInputs;
2623 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2624 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2625 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2626 break;
2627 }
2628 }
2629
2630 // If we found an output register, place a mapping of the new value
2631 // to the original in the mapping.
2632 if (!OutputIdx)
2633 return;
2634
2635 auto It = OutputMappings.find(Outputs[*OutputIdx]);
2636 if (It == OutputMappings.end()) {
2637 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2638 << *Outputs[*OutputIdx] << "\n");
2639 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2640 } else {
2641 Value *Orig = It->second;
2642 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2643 << *Outputs[*OutputIdx] << "\n");
2644 OutputMappings.insert(std::make_pair(LI, Orig));
2645 }
2646}
2647
2648bool IROutliner::extractSection(OutlinableRegion &Region) {
2649 SetVector<Value *> ArgInputs, Outputs;
2650 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2651 BasicBlock *InitialStart = Region.StartBB;
2652 Function *OrigF = Region.StartBB->getParent();
2653 CodeExtractorAnalysisCache CEAC(*OrigF);
2654 Region.ExtractedFunction =
2655 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2656
2657 // If the extraction was successful, find the BasicBlock, and reassign the
2658 // OutlinableRegion blocks
2659 if (!Region.ExtractedFunction) {
2660 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2661 << "\n");
2662 Region.reattachCandidate();
2663 return false;
2664 }
2665
2666 // Get the block containing the called branch, and reassign the blocks as
2667 // necessary. If the original block still exists, it is because we ended on
2668 // a branch instruction, and so we move the contents into the block before
2669 // and assign the previous block correctly.
2670 User *InstAsUser = Region.ExtractedFunction->user_back();
2671 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2672 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2673 assert(Region.PrevBB && "PrevBB is nullptr?");
2674 if (Region.PrevBB == InitialStart) {
2675 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2676 Instruction *BI = NewPrev->getTerminator();
2677 BI->eraseFromParent();
2678 moveBBContents(*InitialStart, *NewPrev);
2679 Region.PrevBB = NewPrev;
2680 InitialStart->eraseFromParent();
2681 }
2682
2683 Region.StartBB = RewrittenBB;
2684 Region.EndBB = RewrittenBB;
2685
2686 // The sequences of outlinable regions has now changed. We must fix the
2687 // IRInstructionDataList for consistency. Although they may not be illegal
2688 // instructions, they should not be compared with anything else as they
2689 // should not be outlined in this round. So marking these as illegal is
2690 // allowed.
2691 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2692 Instruction *BeginRewritten = &*RewrittenBB->begin();
2693 Instruction *EndRewritten = &*RewrittenBB->begin();
2694 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2695 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2696 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2697 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2698
2699 // Insert the first IRInstructionData of the new region in front of the
2700 // first IRInstructionData of the IRSimilarityCandidate.
2701 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2702 // Insert the first IRInstructionData of the new region after the
2703 // last IRInstructionData of the IRSimilarityCandidate.
2704 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2705 // Remove the IRInstructionData from the IRSimilarityCandidate.
2706 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2707
2708 assert(RewrittenBB != nullptr &&
2709 "Could not find a predecessor after extraction!");
2710
2711 // Iterate over the new set of instructions to find the new call
2712 // instruction.
2713 for (Instruction &I : *RewrittenBB)
2714 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2715 if (Region.ExtractedFunction == CI->getCalledFunction())
2716 Region.Call = CI;
2717 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2718 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2719 Region.reattachCandidate();
2720 return true;
2721}
2722
2723unsigned IROutliner::doOutline(Module &M) {
2724 // Find the possible similarity sections.
2725 InstructionClassifier.EnableBranches = !DisableBranches;
2726 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2727 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2728
2729 IRSimilarityIdentifier &Identifier = getIRSI(M);
2730 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2731
2732 // Sort them by size of extracted sections
2733 unsigned OutlinedFunctionNum = 0;
2734 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2735 // to sort them by the potential number of instructions to be outlined
2736 if (SimilarityCandidates.size() > 1)
2737 llvm::stable_sort(SimilarityCandidates,
2738 [](const std::vector<IRSimilarityCandidate> &LHS,
2739 const std::vector<IRSimilarityCandidate> &RHS) {
2740 return LHS[0].getLength() * LHS.size() >
2741 RHS[0].getLength() * RHS.size();
2742 });
2743 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2744 // each of the following for loops to avoid making an allocator.
2745 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2746
2747 DenseSet<unsigned> NotSame;
2748 std::vector<OutlinableGroup *> NegativeCostGroups;
2749 std::vector<OutlinableRegion *> OutlinedRegions;
2750 // Iterate over the possible sets of similarity.
2751 unsigned PotentialGroupIdx = 0;
2752 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2753 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2754
2755 // Remove entries that were previously outlined
2756 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2757
2758 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2759 // trying to outlined since there is no compatible similar instance of this
2760 // code.
2761 if (CurrentGroup.Regions.size() < 2)
2762 continue;
2763
2764 // Determine if there are any values that are the same constant throughout
2765 // each section in the set.
2766 NotSame.clear();
2767 CurrentGroup.findSameConstants(NotSame);
2768
2769 if (CurrentGroup.IgnoreGroup)
2770 continue;
2771
2772 // Create a CodeExtractor for each outlinable region. Identify inputs and
2773 // outputs for each section using the code extractor and create the argument
2774 // types for the Aggregate Outlining Function.
2775 OutlinedRegions.clear();
2776 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2777 // Break the outlinable region out of its parent BasicBlock into its own
2778 // BasicBlocks (see function implementation).
2779 OS->splitCandidate();
2780
2781 // There's a chance that when the region is split, extra instructions are
2782 // added to the region. This makes the region no longer viable
2783 // to be split, so we ignore it for outlining.
2784 if (!OS->CandidateSplit)
2785 continue;
2786
2788 DenseSet<BasicBlock *> BlocksInRegion;
2789 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2790 OS->CE = new (ExtractorAllocator.Allocate())
2791 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2792 false, nullptr, "outlined");
2793 findAddInputsOutputs(M, *OS, NotSame);
2794 if (!OS->IgnoreRegion)
2795 OutlinedRegions.push_back(OS);
2796
2797 // We recombine the blocks together now that we have gathered all the
2798 // needed information.
2799 OS->reattachCandidate();
2800 }
2801
2802 CurrentGroup.Regions = std::move(OutlinedRegions);
2803
2804 if (CurrentGroup.Regions.empty())
2805 continue;
2806
2807 CurrentGroup.collectGVNStoreSets(M);
2808
2809 if (CostModel)
2810 findCostBenefit(M, CurrentGroup);
2811
2812 // If we are adhering to the cost model, skip those groups where the cost
2813 // outweighs the benefits.
2814 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2815 OptimizationRemarkEmitter &ORE =
2816 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2817 ORE.emit([&]() {
2818 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2819 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2820 C->frontInstruction());
2821 R << "did not outline "
2822 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2823 << " regions due to estimated increase of "
2824 << ore::NV("InstructionIncrease",
2825 CurrentGroup.Cost - CurrentGroup.Benefit)
2826 << " instructions at locations ";
2827 interleave(
2828 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2829 [&R](OutlinableRegion *Region) {
2830 R << ore::NV(
2831 "DebugLoc",
2832 Region->Candidate->frontInstruction()->getDebugLoc());
2833 },
2834 [&R]() { R << " "; });
2835 return R;
2836 });
2837 continue;
2838 }
2839
2840 NegativeCostGroups.push_back(&CurrentGroup);
2841 }
2842
2843 ExtractorAllocator.DestroyAll();
2844
2845 if (NegativeCostGroups.size() > 1)
2846 stable_sort(NegativeCostGroups,
2847 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2848 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2849 });
2850
2851 std::vector<Function *> FuncsToRemove;
2852 for (OutlinableGroup *CG : NegativeCostGroups) {
2853 OutlinableGroup &CurrentGroup = *CG;
2854
2855 OutlinedRegions.clear();
2856 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2857 // We check whether our region is compatible with what has already been
2858 // outlined, and whether we need to ignore this item.
2859 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2860 continue;
2861 OutlinedRegions.push_back(Region);
2862 }
2863
2864 if (OutlinedRegions.size() < 2)
2865 continue;
2866
2867 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2868 // we are still outlining enough regions to make up for the added cost.
2869 CurrentGroup.Regions = std::move(OutlinedRegions);
2870 if (CostModel) {
2871 CurrentGroup.Benefit = 0;
2872 CurrentGroup.Cost = 0;
2873 findCostBenefit(M, CurrentGroup);
2874 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2875 continue;
2876 }
2877 OutlinedRegions.clear();
2878 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2879 Region->splitCandidate();
2880 if (!Region->CandidateSplit)
2881 continue;
2882 OutlinedRegions.push_back(Region);
2883 }
2884
2885 CurrentGroup.Regions = std::move(OutlinedRegions);
2886 if (CurrentGroup.Regions.size() < 2) {
2887 for (OutlinableRegion *R : CurrentGroup.Regions)
2888 R->reattachCandidate();
2889 continue;
2890 }
2891
2892 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2893 << " and benefit " << CurrentGroup.Benefit << "\n");
2894
2895 // Create functions out of all the sections, and mark them as outlined.
2896 OutlinedRegions.clear();
2897 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2899 DenseSet<BasicBlock *> BlocksInRegion;
2900 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2901 OS->CE = new (ExtractorAllocator.Allocate())
2902 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2903 false, nullptr, "outlined");
2904 bool FunctionOutlined = extractSection(*OS);
2905 if (FunctionOutlined) {
2906 unsigned StartIdx = OS->Candidate->getStartIdx();
2907 unsigned EndIdx = OS->Candidate->getEndIdx();
2908 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2909 Outlined.insert(Idx);
2910
2911 OutlinedRegions.push_back(OS);
2912 }
2913 }
2914
2915 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2916 << " with benefit " << CurrentGroup.Benefit
2917 << " and cost " << CurrentGroup.Cost << "\n");
2918
2919 CurrentGroup.Regions = std::move(OutlinedRegions);
2920
2921 if (CurrentGroup.Regions.empty())
2922 continue;
2923
2924 OptimizationRemarkEmitter &ORE =
2925 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2926 ORE.emit([&]() {
2927 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2928 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2929 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2930 << " regions with decrease of "
2931 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2932 << " instructions at locations ";
2933 interleave(
2934 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2935 [&R](OutlinableRegion *Region) {
2936 R << ore::NV("DebugLoc",
2937 Region->Candidate->frontInstruction()->getDebugLoc());
2938 },
2939 [&R]() { R << " "; });
2940 return R;
2941 });
2942
2943 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2944 OutlinedFunctionNum);
2945 }
2946
2947 for (Function *F : FuncsToRemove)
2948 F->eraseFromParent();
2949
2950 return OutlinedFunctionNum;
2951}
2952
2954 CostModel = !NoCostModel;
2955 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2956
2957 return doOutline(M) > 0;
2958}
2959
2961 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2962
2963 std::function<TargetTransformInfo &(Function &)> GTTI =
2964 [&FAM](Function &F) -> TargetTransformInfo & {
2965 return FAM.getResult<TargetIRAnalysis>(F);
2966 };
2967
2968 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2969 [&AM](Module &M) -> IRSimilarityIdentifier & {
2970 return AM.getResult<IRSimilarityAnalysis>(M);
2971 };
2972
2973 std::unique_ptr<OptimizationRemarkEmitter> ORE;
2974 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2975 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2976 ORE.reset(new OptimizationRemarkEmitter(&F));
2977 return *ORE;
2978 };
2979
2980 if (IROutliner(GTTI, GIRSI, GORE).run(M))
2981 return PreservedAnalyses::none();
2982 return PreservedAnalyses::all();
2983}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
SmallVector< unsigned, 2 > CanonList
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
This header defines various interfaces for pass management in LLVM.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:470
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:647
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Utility class for extracting code into a new function.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
Definition Constant.h:43
Subprogram description. Uses SubclassData1.
static DebugLoc getDropped()
Definition DebugLoc.h:163
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Class to represent function types.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition Function.cpp:638
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
const BasicBlock & getEntryBlock() const
Definition Function.h:809
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
const BasicBlock & back() const
Definition Function.h:862
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
size_t size() const
Definition Function.h:858
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:708
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition Function.cpp:666
size_t arg_size() const
Definition Function.h:901
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool hasLinkOnceODRLinkage() const
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
An instruction for reading from memory.
Value * getPointerOperand()
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
LLVM_ABI void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition Mangler.cpp:121
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition RegionInfo.h:362
iterator end()
Definition RegionInfo.h:559
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
LLVM_ABI bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
User * user_back()
Definition Value.h:413
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
bool erase(const ValueT &V)
Definition DenseSet.h:100
An opaque object representing a hash code.
Definition Hashing.h:76
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
iterator erase(iterator I)
Remove a node by iterator; never deletes.
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
CallInst * Call
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2116
hash_code hash_value(const FixedPointSemantics &Val)
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition STLExtras.h:2275
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void RemapFunction(Function &F, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the operands, metadata, arguments, and instructions of a function.
auto predecessors(const MachineBasicBlock *BB)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
LLVM_ABI void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Function * OutlinedFunction
The Function for the collective overall function.
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition IROutliner.h:123
CodeExtractor * CE
Used to create an outlined function.
Definition IROutliner.h:120
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition IROutliner.h:130
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition IROutliner.h:133
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition IROutliner.h:93
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition IROutliner.h:153
Function * ExtractedFunction
The function for the extracted region.
Definition IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...