LLVM 20.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
27#include <optional>
28#include <vector>
29
30#define DEBUG_TYPE "iroutliner"
31
32using namespace llvm;
33using namespace IRSimilarity;
34
35// A command flag to be used for debugging to exclude branches from similarity
36// matching and outlining.
37namespace llvm {
39
40// A command flag to be used for debugging to indirect calls from similarity
41// matching and outlining.
43
44// A command flag to be used for debugging to exclude intrinsics from similarity
45// matching and outlining.
47
48} // namespace llvm
49
50// Set to true if the user wants the ir outliner to run on linkonceodr linkage
51// functions. This is false by default because the linker can dedupe linkonceodr
52// functions. Since the outliner is confined to a single module (modulo LTO),
53// this is off by default. It should, however, be the default behavior in
54// LTO.
56 "enable-linkonceodr-ir-outlining", cl::Hidden,
57 cl::desc("Enable the IR outliner on linkonceodr functions"),
58 cl::init(false));
59
60// This is a debug option to test small pieces of code to ensure that outlining
61// works correctly.
63 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
64 cl::desc("Debug option to outline greedily, without restriction that "
65 "calculated benefit outweighs cost"));
66
67/// The OutlinableGroup holds all the overarching information for outlining
68/// a set of regions that are structurally similar to one another, such as the
69/// types of the overall function, the output blocks, the sets of stores needed
70/// and a list of the different regions. This information is used in the
71/// deduplication of extracted regions with the same structure.
73 /// The sections that could be outlined
74 std::vector<OutlinableRegion *> Regions;
75
76 /// The argument types for the function created as the overall function to
77 /// replace the extracted function for each region.
78 std::vector<Type *> ArgumentTypes;
79 /// The FunctionType for the overall function.
81 /// The Function for the collective overall function.
83
84 /// Flag for whether we should not consider this group of OutlinableRegions
85 /// for extraction.
86 bool IgnoreGroup = false;
87
88 /// The return blocks for the overall function.
90
91 /// The PHIBlocks with their corresponding return block based on the return
92 /// value as the key.
94
95 /// A set containing the different GVN store sets needed. Each array contains
96 /// a sorted list of the different values that need to be stored into output
97 /// registers.
99
100 /// Flag for whether the \ref ArgumentTypes have been defined after the
101 /// extraction of the first region.
102 bool InputTypesSet = false;
103
104 /// The number of input values in \ref ArgumentTypes. Anything after this
105 /// index in ArgumentTypes is an output argument.
106 unsigned NumAggregateInputs = 0;
107
108 /// The mapping of the canonical numbering of the values in outlined sections
109 /// to specific arguments.
111
112 /// The number of branches in the region target a basic block that is outside
113 /// of the region.
114 unsigned BranchesToOutside = 0;
115
116 /// Tracker counting backwards from the highest unsigned value possible to
117 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
118 /// -2 and -1 are assigned by the DenseMap.
119 unsigned PHINodeGVNTracker = -3;
120
122 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
125
126 /// The number of instructions that will be outlined by extracting \ref
127 /// Regions.
129 /// The number of added instructions needed for the outlining of the \ref
130 /// Regions.
132
133 /// The argument that needs to be marked with the swifterr attribute. If not
134 /// needed, there is no value.
135 std::optional<unsigned> SwiftErrorArgument;
136
137 /// For the \ref Regions, we look at every Value. If it is a constant,
138 /// we check whether it is the same in Region.
139 ///
140 /// \param [in,out] NotSame contains the global value numbers where the
141 /// constant is not always the same, and must be passed in as an argument.
143
144 /// For the regions, look at each set of GVN stores needed and account for
145 /// each combination. Add an argument to the argument types if there is
146 /// more than one combination.
147 ///
148 /// \param [in] M - The module we are outlining from.
150};
151
152/// Move the contents of \p SourceBB to before the last instruction of \p
153/// TargetBB.
154/// \param SourceBB - the BasicBlock to pull Instructions from.
155/// \param TargetBB - the BasicBlock to put Instruction into.
156static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
157 TargetBB.splice(TargetBB.end(), &SourceBB);
158}
159
160/// A function to sort the keys of \p Map, which must be a mapping of constant
161/// values to basic blocks and return it in \p SortedKeys
162///
163/// \param SortedKeys - The vector the keys will be return in and sorted.
164/// \param Map - The DenseMap containing keys to sort.
165static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
167 for (auto &VtoBB : Map)
168 SortedKeys.push_back(VtoBB.first);
169
170 // Here we expect to have either 1 value that is void (nullptr) or multiple
171 // values that are all constant integers.
172 if (SortedKeys.size() == 1) {
173 assert(!SortedKeys[0] && "Expected a single void value.");
174 return;
175 }
176
177 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
178 assert(LHS && RHS && "Expected non void values.");
179 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
180 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
181
182 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
183 });
184}
185
187 Value *V) {
188 std::optional<unsigned> GVN = Candidate->getGVN(V);
189 assert(GVN && "No GVN for incoming value");
190 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
191 std::optional<unsigned> FirstGVN =
192 Other.Candidate->fromCanonicalNum(*CanonNum);
193 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
194 return FoundValueOpt.value_or(nullptr);
195}
196
199 BasicBlock *BB) {
200 Instruction *FirstNonPHI = BB->getFirstNonPHIOrDbg();
201 assert(FirstNonPHI && "block is empty?");
202 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
203 if (!CorrespondingVal)
204 return nullptr;
205 BasicBlock *CorrespondingBlock =
206 cast<Instruction>(CorrespondingVal)->getParent();
207 return CorrespondingBlock;
208}
209
210/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
211/// in \p Included to branch to BasicBlock \p Replace if they currently branch
212/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
213/// when PHINodes are included in outlined regions.
214///
215/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
216/// checked.
217/// \param Find - The successor block to be replaced.
218/// \param Replace - The new succesor block to branch to.
219/// \param Included - The set of blocks about to be outlined.
221 BasicBlock *Replace,
222 DenseSet<BasicBlock *> &Included) {
223 for (PHINode &PN : PHIBlock->phis()) {
224 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
225 ++Idx) {
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 BasicBlock *Incoming = PN.getIncomingBlock(Idx);
229 if (!Included.contains(Incoming))
230 continue;
231
232 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
233 assert(BI && "Not a branch instruction?");
234 // Look over the branching instructions into this block to see if we
235 // used to branch to Find in this outlined block.
236 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
237 Succ++) {
238 // If we have found the block to replace, we do so here.
239 if (BI->getSuccessor(Succ) != Find)
240 continue;
241 BI->setSuccessor(Succ, Replace);
242 }
243 }
244 }
245}
246
247
249 assert(!CandidateSplit && "Candidate already split!");
250
252
253 Instruction *EndInst = nullptr;
254 // Check whether the last instruction is a terminator, if it is, we do
255 // not split on the following instruction. We leave the block as it is. We
256 // also check that this is not the last instruction in the Module, otherwise
257 // the check for whether the current following instruction matches the
258 // previously recorded instruction will be incorrect.
259 if (!BackInst->isTerminator() ||
260 BackInst->getParent() != &BackInst->getFunction()->back()) {
261 EndInst = Candidate->end()->Inst;
262 assert(EndInst && "Expected an end instruction?");
263 }
264
265 // We check if the current instruction following the last instruction in the
266 // region is the same as the recorded instruction following the last
267 // instruction. If they do not match, there could be problems in rewriting
268 // the program after outlining, so we ignore it.
269 if (!BackInst->isTerminator() &&
270 EndInst != BackInst->getNextNonDebugInstruction())
271 return;
272
273 Instruction *StartInst = (*Candidate->begin()).Inst;
274 assert(StartInst && "Expected a start instruction?");
275 StartBB = StartInst->getParent();
276 PrevBB = StartBB;
277
280
281 // We iterate over the instructions in the region, if we find a PHINode, we
282 // check if there are predecessors outside of the region, if there are,
283 // we ignore this region since we are unable to handle the severing of the
284 // phi node right now.
285
286 // TODO: Handle extraneous inputs for PHINodes through variable number of
287 // inputs, similar to how outputs are handled.
288 BasicBlock::iterator It = StartInst->getIterator();
289 EndBB = BackInst->getParent();
290 BasicBlock *IBlock;
291 BasicBlock *PHIPredBlock = nullptr;
292 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
293 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
294 unsigned NumPredsOutsideRegion = 0;
295 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
296 if (!BBSet.contains(PN->getIncomingBlock(i))) {
297 PHIPredBlock = PN->getIncomingBlock(i);
298 ++NumPredsOutsideRegion;
299 continue;
300 }
301
302 // We must consider the case there the incoming block to the PHINode is
303 // the same as the final block of the OutlinableRegion. If this is the
304 // case, the branch from this block must also be outlined to be valid.
305 IBlock = PN->getIncomingBlock(i);
306 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
307 PHIPredBlock = PN->getIncomingBlock(i);
308 ++NumPredsOutsideRegion;
309 }
310 }
311
312 if (NumPredsOutsideRegion > 1)
313 return;
314
315 It++;
316 }
317
318 // If the region starts with a PHINode, but is not the initial instruction of
319 // the BasicBlock, we ignore this region for now.
320 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
321 return;
322
323 // If the region ends with a PHINode, but does not contain all of the phi node
324 // instructions of the region, we ignore it for now.
325 if (isa<PHINode>(BackInst) &&
326 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
327 return;
328
329 // The basic block gets split like so:
330 // block: block:
331 // inst1 inst1
332 // inst2 inst2
333 // region1 br block_to_outline
334 // region2 block_to_outline:
335 // region3 -> region1
336 // region4 region2
337 // inst3 region3
338 // inst4 region4
339 // br block_after_outline
340 // block_after_outline:
341 // inst3
342 // inst4
343
344 std::string OriginalName = PrevBB->getName().str();
345
346 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
348 // If there was a PHINode with an incoming block outside the region,
349 // make sure is correctly updated in the newly split block.
350 if (PHIPredBlock)
352
353 CandidateSplit = true;
354 if (!BackInst->isTerminator()) {
355 EndBB = EndInst->getParent();
356 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
359 } else {
360 EndBB = BackInst->getParent();
361 EndsInBranch = true;
362 FollowBB = nullptr;
363 }
364
365 // Refind the basic block set.
366 BBSet.clear();
368 // For the phi nodes in the new starting basic block of the region, we
369 // reassign the targets of the basic blocks branching instructions.
371 if (FollowBB)
373}
374
376 assert(CandidateSplit && "Candidate is not split!");
377
378 // The basic block gets reattached like so:
379 // block: block:
380 // inst1 inst1
381 // inst2 inst2
382 // br block_to_outline region1
383 // block_to_outline: -> region2
384 // region1 region3
385 // region2 region4
386 // region3 inst3
387 // region4 inst4
388 // br block_after_outline
389 // block_after_outline:
390 // inst3
391 // inst4
392 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
393
394 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
395 // Make sure PHINode references to the block we are merging into are
396 // updated to be incoming blocks from the predecessor to the current block.
397
398 // NOTE: If this is updated such that the outlined block can have more than
399 // one incoming block to a PHINode, this logic will have to updated
400 // to handle multiple precessors instead.
401
402 // We only need to update this if the outlined section contains a PHINode, if
403 // it does not, then the incoming block was never changed in the first place.
404 // On the other hand, if PrevBB has no predecessors, it means that all
405 // incoming blocks to the first block are contained in the region, and there
406 // will be nothing to update.
407 Instruction *StartInst = (*Candidate->begin()).Inst;
408 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
410 "PrevBB has more than one predecessor. Should be 0 or 1.");
411 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
413 }
415
416 // If we reattaching after outlining, we iterate over the phi nodes to
417 // the initial block, and reassign the branch instructions of the incoming
418 // blocks to the block we are remerging into.
419 if (!ExtractedFunction) {
422
424 if (!EndsInBranch)
426 }
427
429
430 BasicBlock *PlacementBB = PrevBB;
431 if (StartBB != EndBB)
432 PlacementBB = EndBB;
433 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
434 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
435 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
436 PlacementBB->getTerminator()->eraseFromParent();
437 moveBBContents(*FollowBB, *PlacementBB);
438 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
440 }
441
444
445 // Make sure to save changes back to the StartBB.
446 StartBB = PrevBB;
447 EndBB = nullptr;
448 PrevBB = nullptr;
449 FollowBB = nullptr;
450
451 CandidateSplit = false;
452}
453
454/// Find whether \p V matches the Constants previously found for the \p GVN.
455///
456/// \param V - The value to check for consistency.
457/// \param GVN - The global value number assigned to \p V.
458/// \param GVNToConstant - The mapping of global value number to Constants.
459/// \returns true if the Value matches the Constant mapped to by V and false if
460/// it \p V is a Constant but does not match.
461/// \returns std::nullopt if \p V is not a Constant.
462static std::optional<bool>
463constantMatches(Value *V, unsigned GVN,
464 DenseMap<unsigned, Constant *> &GVNToConstant) {
465 // See if we have a constants
466 Constant *CST = dyn_cast<Constant>(V);
467 if (!CST)
468 return std::nullopt;
469
470 // Holds a mapping from a global value number to a Constant.
472 bool Inserted;
473
474
475 // If we have a constant, try to make a new entry in the GVNToConstant.
476 std::tie(GVNToConstantIt, Inserted) =
477 GVNToConstant.insert(std::make_pair(GVN, CST));
478 // If it was found and is not equal, it is not the same. We do not
479 // handle this case yet, and exit early.
480 if (Inserted || (GVNToConstantIt->second == CST))
481 return true;
482
483 return false;
484}
485
487 InstructionCost Benefit = 0;
488
489 // Estimate the benefit of outlining a specific sections of the program. We
490 // delegate mostly this task to the TargetTransformInfo so that if the target
491 // has specific changes, we can have a more accurate estimate.
492
493 // However, getInstructionCost delegates the code size calculation for
494 // arithmetic instructions to getArithmeticInstrCost in
495 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
496 // code size for a division and remainder instruction to be equal to 4, and
497 // everything else to 1. This is not an accurate representation of the
498 // division instruction for targets that have a native division instruction.
499 // To be overly conservative, we only add 1 to the number of instructions for
500 // each division instruction.
501 for (IRInstructionData &ID : *Candidate) {
502 Instruction *I = ID.Inst;
503 switch (I->getOpcode()) {
504 case Instruction::FDiv:
505 case Instruction::FRem:
506 case Instruction::SDiv:
507 case Instruction::SRem:
508 case Instruction::UDiv:
509 case Instruction::URem:
510 Benefit += 1;
511 break;
512 default:
514 break;
515 }
516 }
517
518 return Benefit;
519}
520
521/// Check the \p OutputMappings structure for value \p Input, if it exists
522/// it has been used as an output for outlining, and has been renamed, and we
523/// return the new value, otherwise, we return the same value.
524///
525/// \param OutputMappings [in] - The mapping of values to their renamed value
526/// after being used as an output for an outlined region.
527/// \param Input [in] - The value to find the remapped value of, if it exists.
528/// \return The remapped value if it has been renamed, and the same value if has
529/// not.
531 Value *Input) {
533 OutputMappings.find(Input);
534 if (OutputMapping != OutputMappings.end())
535 return OutputMapping->second;
536 return Input;
537}
538
539/// Find whether \p Region matches the global value numbering to Constant
540/// mapping found so far.
541///
542/// \param Region - The OutlinableRegion we are checking for constants
543/// \param GVNToConstant - The mapping of global value number to Constants.
544/// \param NotSame - The set of global value numbers that do not have the same
545/// constant in each region.
546/// \returns true if all Constants are the same in every use of a Constant in \p
547/// Region and false if not
548static bool
550 DenseMap<unsigned, Constant *> &GVNToConstant,
551 DenseSet<unsigned> &NotSame) {
552 bool ConstantsTheSame = true;
553
554 IRSimilarityCandidate &C = *Region.Candidate;
555 for (IRInstructionData &ID : C) {
556
557 // Iterate over the operands in an instruction. If the global value number,
558 // assigned by the IRSimilarityCandidate, has been seen before, we check if
559 // the number has been found to be not the same value in each instance.
560 for (Value *V : ID.OperVals) {
561 std::optional<unsigned> GVNOpt = C.getGVN(V);
562 assert(GVNOpt && "Expected a GVN for operand?");
563 unsigned GVN = *GVNOpt;
564
565 // Check if this global value has been found to not be the same already.
566 if (NotSame.contains(GVN)) {
567 if (isa<Constant>(V))
568 ConstantsTheSame = false;
569 continue;
570 }
571
572 // If it has been the same so far, we check the value for if the
573 // associated Constant value match the previous instances of the same
574 // global value number. If the global value does not map to a Constant,
575 // it is considered to not be the same value.
576 std::optional<bool> ConstantMatches =
577 constantMatches(V, GVN, GVNToConstant);
578 if (ConstantMatches) {
579 if (*ConstantMatches)
580 continue;
581 else
582 ConstantsTheSame = false;
583 }
584
585 // While this value is a register, it might not have been previously,
586 // make sure we don't already have a constant mapped to this global value
587 // number.
588 if (GVNToConstant.contains(GVN))
589 ConstantsTheSame = false;
590
591 NotSame.insert(GVN);
592 }
593 }
594
595 return ConstantsTheSame;
596}
597
599 DenseMap<unsigned, Constant *> GVNToConstant;
600
601 for (OutlinableRegion *Region : Regions)
602 collectRegionsConstants(*Region, GVNToConstant, NotSame);
603}
604
606 for (OutlinableRegion *OS : Regions)
607 OutputGVNCombinations.insert(OS->GVNStores);
608
609 // We are adding an extracted argument to decide between which output path
610 // to use in the basic block. It is used in a switch statement and only
611 // needs to be an integer.
612 if (OutputGVNCombinations.size() > 1)
613 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
614}
615
616/// Get the subprogram if it exists for one of the outlined regions.
617///
618/// \param [in] Group - The set of regions to find a subprogram for.
619/// \returns the subprogram if it exists, or nullptr.
621 for (OutlinableRegion *OS : Group.Regions)
622 if (Function *F = OS->Call->getFunction())
623 if (DISubprogram *SP = F->getSubprogram())
624 return SP;
625
626 return nullptr;
627}
628
629Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
630 unsigned FunctionNameSuffix) {
631 assert(!Group.OutlinedFunction && "Function is already defined!");
632
633 Type *RetTy = Type::getVoidTy(M.getContext());
634 // All extracted functions _should_ have the same return type at this point
635 // since the similarity identifier ensures that all branches outside of the
636 // region occur in the same place.
637
638 // NOTE: Should we ever move to the model that uses a switch at every point
639 // needed, meaning that we could branch within the region or out, it is
640 // possible that we will need to switch to using the most general case all of
641 // the time.
642 for (OutlinableRegion *R : Group.Regions) {
643 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
644 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
645 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
646 RetTy = ExtractedFuncType;
647 }
648
650 RetTy, Group.ArgumentTypes, false);
651
652 // These functions will only be called from within the same module, so
653 // we can set an internal linkage.
656 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
657
658 // Transfer the swifterr attribute to the correct function parameter.
659 if (Group.SwiftErrorArgument)
661 Attribute::SwiftError);
662
663 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
664 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
665
666 // If there's a DISubprogram associated with this outlined function, then
667 // emit debug info for the outlined function.
668 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
669 Function *F = Group.OutlinedFunction;
670 // We have a DISubprogram. Get its DICompileUnit.
671 DICompileUnit *CU = SP->getUnit();
672 DIBuilder DB(M, true, CU);
673 DIFile *Unit = SP->getFile();
674 Mangler Mg;
675 // Get the mangled name of the function for the linkage name.
676 std::string Dummy;
677 llvm::raw_string_ostream MangledNameStream(Dummy);
678 Mg.getNameWithPrefix(MangledNameStream, F, false);
679
680 DISubprogram *OutlinedSP = DB.createFunction(
681 Unit /* Context */, F->getName(), Dummy,
682 Unit /* File */,
683 0 /* Line 0 is reserved for compiler-generated code. */,
684 DB.createSubroutineType(
685 DB.getOrCreateTypeArray(std::nullopt)), /* void type */
686 0, /* Line 0 is reserved for compiler-generated code. */
687 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
688 /* Outlined code is optimized code by definition. */
689 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
690
691 // Don't add any new variables to the subprogram.
692 DB.finalizeSubprogram(OutlinedSP);
693
694 // Attach subprogram to the function.
695 F->setSubprogram(OutlinedSP);
696 // We're done with the DIBuilder.
697 DB.finalize();
698 }
699
700 return Group.OutlinedFunction;
701}
702
703/// Move each BasicBlock in \p Old to \p New.
704///
705/// \param [in] Old - The function to move the basic blocks from.
706/// \param [in] New - The function to move the basic blocks to.
707/// \param [out] NewEnds - The return blocks of the new overall function.
708static void moveFunctionData(Function &Old, Function &New,
710 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
711 CurrBB.removeFromParent();
712 CurrBB.insertInto(&New);
713 Instruction *I = CurrBB.getTerminator();
714
715 // For each block we find a return instruction is, it is a potential exit
716 // path for the function. We keep track of each block based on the return
717 // value here.
718 if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
719 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
720
721 std::vector<Instruction *> DebugInsts;
722
723 for (Instruction &Val : CurrBB) {
724 // Since debug-info originates from many different locations in the
725 // program, it will cause incorrect reporting from a debugger if we keep
726 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
727 // here, collect intrinsics for removal later.
728 Val.dropDbgRecords();
729
730 // We must handle the scoping of called functions differently than
731 // other outlined instructions.
732 if (!isa<CallInst>(&Val)) {
733 // Remove the debug information for outlined functions.
734 Val.setDebugLoc(DebugLoc());
735
736 // Loop info metadata may contain line locations. Update them to have no
737 // value in the new subprogram since the outlined code could be from
738 // several locations.
739 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
740 if (DISubprogram *SP = New.getSubprogram())
741 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
742 return DILocation::get(New.getContext(), Loc->getLine(),
743 Loc->getColumn(), SP, nullptr);
744 return MD;
745 };
746 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
747 continue;
748 }
749
750 // From this point we are only handling call instructions.
751 CallInst *CI = cast<CallInst>(&Val);
752
753 // Collect debug intrinsics for later removal.
754 if (isa<DbgInfoIntrinsic>(CI)) {
755 DebugInsts.push_back(&Val);
756 continue;
757 }
758
759 // Edit the scope of called functions inside of outlined functions.
760 if (DISubprogram *SP = New.getSubprogram()) {
761 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
762 Val.setDebugLoc(DI);
763 }
764 }
765
766 for (Instruction *I : DebugInsts)
767 I->eraseFromParent();
768 }
769}
770
771/// Find the constants that will need to be lifted into arguments
772/// as they are not the same in each instance of the region.
773///
774/// \param [in] C - The IRSimilarityCandidate containing the region we are
775/// analyzing.
776/// \param [in] NotSame - The set of global value numbers that do not have a
777/// single Constant across all OutlinableRegions similar to \p C.
778/// \param [out] Inputs - The list containing the global value numbers of the
779/// arguments needed for the region of code.
781 std::vector<unsigned> &Inputs) {
783 // Iterate over the instructions, and find what constants will need to be
784 // extracted into arguments.
785 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
786 IDIt != EndIDIt; IDIt++) {
787 for (Value *V : (*IDIt).OperVals) {
788 // Since these are stored before any outlining, they will be in the
789 // global value numbering.
790 unsigned GVN = *C.getGVN(V);
791 if (isa<Constant>(V))
792 if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
793 Inputs.push_back(GVN);
794 Seen.insert(GVN);
795 }
796 }
797 }
798}
799
800/// Find the GVN for the inputs that have been found by the CodeExtractor.
801///
802/// \param [in] C - The IRSimilarityCandidate containing the region we are
803/// analyzing.
804/// \param [in] CurrentInputs - The set of inputs found by the
805/// CodeExtractor.
806/// \param [in] OutputMappings - The mapping of values that have been replaced
807/// by a new output value.
808/// \param [out] EndInputNumbers - The global value numbers for the extracted
809/// arguments.
811 SetVector<Value *> &CurrentInputs,
812 const DenseMap<Value *, Value *> &OutputMappings,
813 std::vector<unsigned> &EndInputNumbers) {
814 // Get the Global Value Number for each input. We check if the Value has been
815 // replaced by a different value at output, and use the original value before
816 // replacement.
817 for (Value *Input : CurrentInputs) {
818 assert(Input && "Have a nullptr as an input");
819 if (OutputMappings.contains(Input))
820 Input = OutputMappings.find(Input)->second;
821 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
822 EndInputNumbers.push_back(*C.getGVN(Input));
823 }
824}
825
826/// Find the original value for the \p ArgInput values if any one of them was
827/// replaced during a previous extraction.
828///
829/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
830/// \param [in] OutputMappings - The mapping of values that have been replaced
831/// by a new output value.
832/// \param [out] RemappedArgInputs - The remapped values according to
833/// \p OutputMappings that will be extracted.
834static void
836 const DenseMap<Value *, Value *> &OutputMappings,
837 SetVector<Value *> &RemappedArgInputs) {
838 // Get the global value number for each input that will be extracted as an
839 // argument by the code extractor, remapping if needed for reloaded values.
840 for (Value *Input : ArgInputs) {
841 if (OutputMappings.contains(Input))
842 Input = OutputMappings.find(Input)->second;
843 RemappedArgInputs.insert(Input);
844 }
845}
846
847/// Find the input GVNs and the output values for a region of Instructions.
848/// Using the code extractor, we collect the inputs to the extracted function.
849///
850/// The \p Region can be identified as needing to be ignored in this function.
851/// It should be checked whether it should be ignored after a call to this
852/// function.
853///
854/// \param [in,out] Region - The region of code to be analyzed.
855/// \param [out] InputGVNs - The global value numbers for the extracted
856/// arguments.
857/// \param [in] NotSame - The global value numbers in the region that do not
858/// have the same constant value in the regions structurally similar to
859/// \p Region.
860/// \param [in] OutputMappings - The mapping of values that have been replaced
861/// by a new output value after extraction.
862/// \param [out] ArgInputs - The values of the inputs to the extracted function.
863/// \param [out] Outputs - The set of values extracted by the CodeExtractor
864/// as outputs.
866 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
867 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
868 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
869 IRSimilarityCandidate &C = *Region.Candidate;
870
871 // OverallInputs are the inputs to the region found by the CodeExtractor,
872 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
873 // allocas of values whose lifetimes are contained completely within the
874 // outlined region. PremappedInputs are the arguments found by the
875 // CodeExtractor, removing conditions such as sunken allocas, but that
876 // may need to be remapped due to the extracted output values replacing
877 // the original values. We use DummyOutputs for this first run of finding
878 // inputs and outputs since the outputs could change during findAllocas,
879 // the correct set of extracted outputs will be in the final Outputs ValueSet.
880 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
881 DummyOutputs;
882
883 // Use the code extractor to get the inputs and outputs, without sunken
884 // allocas or removing llvm.assumes.
885 CodeExtractor *CE = Region.CE;
886 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
887 assert(Region.StartBB && "Region must have a start BasicBlock!");
888 Function *OrigF = Region.StartBB->getParent();
889 CodeExtractorAnalysisCache CEAC(*OrigF);
890 BasicBlock *Dummy = nullptr;
891
892 // The region may be ineligible due to VarArgs in the parent function. In this
893 // case we ignore the region.
894 if (!CE->isEligible()) {
895 Region.IgnoreRegion = true;
896 return;
897 }
898
899 // Find if any values are going to be sunk into the function when extracted
900 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
901 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
902
903 // TODO: Support regions with sunken allocas: values whose lifetimes are
904 // contained completely within the outlined region. These are not guaranteed
905 // to be the same in every region, so we must elevate them all to arguments
906 // when they appear. If these values are not equal, it means there is some
907 // Input in OverallInputs that was removed for ArgInputs.
908 if (OverallInputs.size() != PremappedInputs.size()) {
909 Region.IgnoreRegion = true;
910 return;
911 }
912
913 findConstants(C, NotSame, InputGVNs);
914
915 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
916
917 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
918 ArgInputs);
919
920 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
921 // we need to make sure they are in a deterministic order.
922 stable_sort(InputGVNs);
923}
924
925/// Look over the inputs and map each input argument to an argument in the
926/// overall function for the OutlinableRegions. This creates a way to replace
927/// the arguments of the extracted function with the arguments of the new
928/// overall function.
929///
930/// \param [in,out] Region - The region of code to be analyzed.
931/// \param [in] InputGVNs - The global value numbering of the input values
932/// collected.
933/// \param [in] ArgInputs - The values of the arguments to the extracted
934/// function.
935static void
937 std::vector<unsigned> &InputGVNs,
938 SetVector<Value *> &ArgInputs) {
939
940 IRSimilarityCandidate &C = *Region.Candidate;
941 OutlinableGroup &Group = *Region.Parent;
942
943 // This counts the argument number in the overall function.
944 unsigned TypeIndex = 0;
945
946 // This counts the argument number in the extracted function.
947 unsigned OriginalIndex = 0;
948
949 // Find the mapping of the extracted arguments to the arguments for the
950 // overall function. Since there may be extra arguments in the overall
951 // function to account for the extracted constants, we have two different
952 // counters as we find extracted arguments, and as we come across overall
953 // arguments.
954
955 // Additionally, in our first pass, for the first extracted function,
956 // we find argument locations for the canonical value numbering. This
957 // numbering overrides any discovered location for the extracted code.
958 for (unsigned InputVal : InputGVNs) {
959 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
960 assert(CanonicalNumberOpt && "Canonical number not found?");
961 unsigned CanonicalNumber = *CanonicalNumberOpt;
962
963 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
964 assert(InputOpt && "Global value number not found?");
965 Value *Input = *InputOpt;
966
968 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
969
970 if (!Group.InputTypesSet) {
971 Group.ArgumentTypes.push_back(Input->getType());
972 // If the input value has a swifterr attribute, make sure to mark the
973 // argument in the overall function.
974 if (Input->isSwiftError()) {
975 assert(
976 !Group.SwiftErrorArgument &&
977 "Argument already marked with swifterr for this OutlinableGroup!");
978 Group.SwiftErrorArgument = TypeIndex;
979 }
980 }
981
982 // Check if we have a constant. If we do add it to the overall argument
983 // number to Constant map for the region, and continue to the next input.
984 if (Constant *CST = dyn_cast<Constant>(Input)) {
985 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
986 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
987 else {
989 std::make_pair(CanonicalNumber, TypeIndex));
990 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
991 }
992 TypeIndex++;
993 continue;
994 }
995
996 // It is not a constant, we create the mapping from extracted argument list
997 // to the overall argument list, using the canonical location, if it exists.
998 assert(ArgInputs.count(Input) && "Input cannot be found!");
999
1000 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
1001 if (OriginalIndex != AggArgIt->second)
1002 Region.ChangedArgOrder = true;
1003 Region.ExtractedArgToAgg.insert(
1004 std::make_pair(OriginalIndex, AggArgIt->second));
1005 Region.AggArgToExtracted.insert(
1006 std::make_pair(AggArgIt->second, OriginalIndex));
1007 } else {
1009 std::make_pair(CanonicalNumber, TypeIndex));
1010 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
1011 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
1012 }
1013 OriginalIndex++;
1014 TypeIndex++;
1015 }
1016
1017 // If the function type definitions for the OutlinableGroup holding the region
1018 // have not been set, set the length of the inputs here. We should have the
1019 // same inputs for all of the different regions contained in the
1020 // OutlinableGroup since they are all structurally similar to one another.
1021 if (!Group.InputTypesSet) {
1022 Group.NumAggregateInputs = TypeIndex;
1023 Group.InputTypesSet = true;
1024 }
1025
1026 Region.NumExtractedInputs = OriginalIndex;
1027}
1028
1029/// Check if the \p V has any uses outside of the region other than \p PN.
1030///
1031/// \param V [in] - The value to check.
1032/// \param PHILoc [in] - The location in the PHINode of \p V.
1033/// \param PN [in] - The PHINode using \p V.
1034/// \param Exits [in] - The potential blocks we exit to from the outlined
1035/// region.
1036/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1037/// \returns true if \p V has any use soutside its region other than \p PN.
1038static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1040 DenseSet<BasicBlock *> &BlocksInRegion) {
1041 // We check to see if the value is used by the PHINode from some other
1042 // predecessor not included in the region. If it is, we make sure
1043 // to keep it as an output.
1044 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1045 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1046 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1047 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1048 }))
1049 return true;
1050
1051 // Check if the value is used by any other instructions outside the region.
1052 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1053 Instruction *I = dyn_cast<Instruction>(U);
1054 if (!I)
1055 return false;
1056
1057 // If the use of the item is inside the region, we skip it. Uses
1058 // inside the region give us useful information about how the item could be
1059 // used as an output.
1060 BasicBlock *Parent = I->getParent();
1061 if (BlocksInRegion.contains(Parent))
1062 return false;
1063
1064 // If it's not a PHINode then we definitely know the use matters. This
1065 // output value will not completely combined with another item in a PHINode
1066 // as it is directly reference by another non-phi instruction
1067 if (!isa<PHINode>(I))
1068 return true;
1069
1070 // If we have a PHINode outside one of the exit locations, then it
1071 // can be considered an outside use as well. If there is a PHINode
1072 // contained in the Exit where this values use matters, it will be
1073 // caught when we analyze that PHINode.
1074 if (!Exits.contains(Parent))
1075 return true;
1076
1077 return false;
1078 });
1079}
1080
1081/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1082/// considered outputs. A PHINodes is an output when more than one incoming
1083/// value has been marked by the CodeExtractor as an output.
1084///
1085/// \param CurrentExitFromRegion [in] - The block to analyze.
1086/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1087/// region.
1088/// \param RegionBlocks [in] - The basic blocks in the region.
1089/// \param Outputs [in, out] - The existing outputs for the region, we may add
1090/// PHINodes to this as we find that they replace output values.
1091/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1092/// totally replaced by a PHINode.
1093/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1094/// in PHINodes, but have other uses, and should still be considered outputs.
1096 BasicBlock *CurrentExitFromRegion,
1097 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1098 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1099 DenseSet<Value *> &OutputsReplacedByPHINode,
1100 DenseSet<Value *> &OutputsWithNonPhiUses) {
1101 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1102 // Find all incoming values from the outlining region.
1103 SmallVector<unsigned, 2> IncomingVals;
1104 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1105 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1106 IncomingVals.push_back(I);
1107
1108 // Do not process PHI if there are no predecessors from region.
1109 unsigned NumIncomingVals = IncomingVals.size();
1110 if (NumIncomingVals == 0)
1111 continue;
1112
1113 // If there is one predecessor, we mark it as a value that needs to be kept
1114 // as an output.
1115 if (NumIncomingVals == 1) {
1116 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1117 OutputsWithNonPhiUses.insert(V);
1118 OutputsReplacedByPHINode.erase(V);
1119 continue;
1120 }
1121
1122 // This PHINode will be used as an output value, so we add it to our list.
1123 Outputs.insert(&PN);
1124
1125 // Not all of the incoming values should be ignored as other inputs and
1126 // outputs may have uses in outlined region. If they have other uses
1127 // outside of the single PHINode we should not skip over it.
1128 for (unsigned Idx : IncomingVals) {
1129 Value *V = PN.getIncomingValue(Idx);
1130 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1131 OutputsWithNonPhiUses.insert(V);
1132 OutputsReplacedByPHINode.erase(V);
1133 continue;
1134 }
1135 if (!OutputsWithNonPhiUses.contains(V))
1136 OutputsReplacedByPHINode.insert(V);
1137 }
1138 }
1139}
1140
1141// Represents the type for the unsigned number denoting the output number for
1142// phi node, along with the canonical number for the exit block.
1143using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1144// The list of canonical numbers for the incoming values to a PHINode.
1146// The pair type representing the set of canonical values being combined in the
1147// PHINode, along with the location data for the PHINode.
1148using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1149
1150/// Encode \p PND as an integer for easy lookup based on the argument location,
1151/// the parent BasicBlock canonical numbering, and the canonical numbering of
1152/// the values stored in the PHINode.
1153///
1154/// \param PND - The data to hash.
1155/// \returns The hash code of \p PND.
1157 return llvm::hash_combine(
1158 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1159 llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1160}
1161
1162/// Create a special GVN for PHINodes that will be used outside of
1163/// the region. We create a hash code based on the Canonical number of the
1164/// parent BasicBlock, the canonical numbering of the values stored in the
1165/// PHINode and the aggregate argument location. This is used to find whether
1166/// this PHINode type has been given a canonical numbering already. If not, we
1167/// assign it a value and store it for later use. The value is returned to
1168/// identify different output schemes for the set of regions.
1169///
1170/// \param Region - The region that \p PN is an output for.
1171/// \param PN - The PHINode we are analyzing.
1172/// \param Blocks - The blocks for the region we are analyzing.
1173/// \param AggArgIdx - The argument \p PN will be stored into.
1174/// \returns An optional holding the assigned canonical number, or std::nullopt
1175/// if there is some attribute of the PHINode blocking it from being used.
1176static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1177 PHINode *PN,
1179 unsigned AggArgIdx) {
1180 OutlinableGroup &Group = *Region.Parent;
1181 IRSimilarityCandidate &Cand = *Region.Candidate;
1182 BasicBlock *PHIBB = PN->getParent();
1183 CanonList PHIGVNs;
1184 Value *Incoming;
1185 BasicBlock *IncomingBlock;
1186 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1188 IncomingBlock = PN->getIncomingBlock(Idx);
1189 // If we cannot find a GVN, and the incoming block is included in the region
1190 // this means that the input to the PHINode is not included in the region we
1191 // are trying to analyze, meaning, that if it was outlined, we would be
1192 // adding an extra input. We ignore this case for now, and so ignore the
1193 // region.
1194 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1195 if (!OGVN && Blocks.contains(IncomingBlock)) {
1196 Region.IgnoreRegion = true;
1197 return std::nullopt;
1198 }
1199
1200 // If the incoming block isn't in the region, we don't have to worry about
1201 // this incoming value.
1202 if (!Blocks.contains(IncomingBlock))
1203 continue;
1204
1205 // Collect the canonical numbers of the values in the PHINode.
1206 unsigned GVN = *OGVN;
1207 OGVN = Cand.getCanonicalNum(GVN);
1208 assert(OGVN && "No GVN found for incoming value?");
1209 PHIGVNs.push_back(*OGVN);
1210
1211 // Find the incoming block and use the canonical numbering as well to define
1212 // the hash for the PHINode.
1213 OGVN = Cand.getGVN(IncomingBlock);
1214
1215 // If there is no number for the incoming block, it is because we have
1216 // split the candidate basic blocks. So we use the previous block that it
1217 // was split from to find the valid global value numbering for the PHINode.
1218 if (!OGVN) {
1219 assert(Cand.getStartBB() == IncomingBlock &&
1220 "Unknown basic block used in exit path PHINode.");
1221
1222 BasicBlock *PrevBlock = nullptr;
1223 // Iterate over the predecessors to the incoming block of the
1224 // PHINode, when we find a block that is not contained in the region
1225 // we know that this is the first block that we split from, and should
1226 // have a valid global value numbering.
1227 for (BasicBlock *Pred : predecessors(IncomingBlock))
1228 if (!Blocks.contains(Pred)) {
1229 PrevBlock = Pred;
1230 break;
1231 }
1232 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1233 OGVN = Cand.getGVN(PrevBlock);
1234 }
1235 GVN = *OGVN;
1236 OGVN = Cand.getCanonicalNum(GVN);
1237 assert(OGVN && "No GVN found for incoming block?");
1238 PHIGVNs.push_back(*OGVN);
1239 }
1240
1241 // Now that we have the GVNs for the incoming values, we are going to combine
1242 // them with the GVN of the incoming bock, and the output location of the
1243 // PHINode to generate a hash value representing this instance of the PHINode.
1246 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1247 assert(BBGVN && "Could not find GVN for the incoming block!");
1248
1249 BBGVN = Cand.getCanonicalNum(*BBGVN);
1250 assert(BBGVN && "Could not find canonical number for the incoming block!");
1251 // Create a pair of the exit block canonical value, and the aggregate
1252 // argument location, connected to the canonical numbers stored in the
1253 // PHINode.
1254 PHINodeData TemporaryPair =
1255 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1256 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1257
1258 // Look for and create a new entry in our connection between canonical
1259 // numbers for PHINodes, and the set of objects we just created.
1260 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1261 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1262 bool Inserted = false;
1263 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1264 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1265 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1266 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1267 }
1268
1269 return GVNToPHIIt->second;
1270}
1271
1272/// Create a mapping of the output arguments for the \p Region to the output
1273/// arguments of the overall outlined function.
1274///
1275/// \param [in,out] Region - The region of code to be analyzed.
1276/// \param [in] Outputs - The values found by the code extractor.
1277static void
1279 SetVector<Value *> &Outputs) {
1280 OutlinableGroup &Group = *Region.Parent;
1281 IRSimilarityCandidate &C = *Region.Candidate;
1282
1284 DenseSet<BasicBlock *> BlocksInRegion;
1285 C.getBasicBlocks(BlocksInRegion, BE);
1286
1287 // Find the exits to the region.
1289 for (BasicBlock *Block : BE)
1290 for (BasicBlock *Succ : successors(Block))
1291 if (!BlocksInRegion.contains(Succ))
1292 Exits.insert(Succ);
1293
1294 // After determining which blocks exit to PHINodes, we add these PHINodes to
1295 // the set of outputs to be processed. We also check the incoming values of
1296 // the PHINodes for whether they should no longer be considered outputs.
1297 DenseSet<Value *> OutputsReplacedByPHINode;
1298 DenseSet<Value *> OutputsWithNonPhiUses;
1299 for (BasicBlock *ExitBB : Exits)
1300 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1301 OutputsReplacedByPHINode,
1302 OutputsWithNonPhiUses);
1303
1304 // This counts the argument number in the extracted function.
1305 unsigned OriginalIndex = Region.NumExtractedInputs;
1306
1307 // This counts the argument number in the overall function.
1308 unsigned TypeIndex = Group.NumAggregateInputs;
1309 bool TypeFound;
1310 DenseSet<unsigned> AggArgsUsed;
1311
1312 // Iterate over the output types and identify if there is an aggregate pointer
1313 // type whose base type matches the current output type. If there is, we mark
1314 // that we will use this output register for this value. If not we add another
1315 // type to the overall argument type list. We also store the GVNs used for
1316 // stores to identify which values will need to be moved into an special
1317 // block that holds the stores to the output registers.
1318 for (Value *Output : Outputs) {
1319 TypeFound = false;
1320 // We can do this since it is a result value, and will have a number
1321 // that is necessarily the same. BUT if in the future, the instructions
1322 // do not have to be in same order, but are functionally the same, we will
1323 // have to use a different scheme, as one-to-one correspondence is not
1324 // guaranteed.
1325 unsigned ArgumentSize = Group.ArgumentTypes.size();
1326
1327 // If the output is combined in a PHINode, we make sure to skip over it.
1328 if (OutputsReplacedByPHINode.contains(Output))
1329 continue;
1330
1331 unsigned AggArgIdx = 0;
1332 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1333 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1334 continue;
1335
1336 if (AggArgsUsed.contains(Jdx))
1337 continue;
1338
1339 TypeFound = true;
1340 AggArgsUsed.insert(Jdx);
1341 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1342 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1343 AggArgIdx = Jdx;
1344 break;
1345 }
1346
1347 // We were unable to find an unused type in the output type set that matches
1348 // the output, so we add a pointer type to the argument types of the overall
1349 // function to handle this output and create a mapping to it.
1350 if (!TypeFound) {
1351 Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
1352 M.getDataLayout().getAllocaAddrSpace()));
1353 // Mark the new pointer type as the last value in the aggregate argument
1354 // list.
1355 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1356 AggArgsUsed.insert(ArgTypeIdx);
1357 Region.ExtractedArgToAgg.insert(
1358 std::make_pair(OriginalIndex, ArgTypeIdx));
1359 Region.AggArgToExtracted.insert(
1360 std::make_pair(ArgTypeIdx, OriginalIndex));
1361 AggArgIdx = ArgTypeIdx;
1362 }
1363
1364 // TODO: Adapt to the extra input from the PHINode.
1365 PHINode *PN = dyn_cast<PHINode>(Output);
1366
1367 std::optional<unsigned> GVN;
1368 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1369 // Values outside the region can be combined into PHINode when we
1370 // have multiple exits. We collect both of these into a list to identify
1371 // which values are being used in the PHINode. Each list identifies a
1372 // different PHINode, and a different output. We store the PHINode as it's
1373 // own canonical value. These canonical values are also dependent on the
1374 // output argument it is saved to.
1375
1376 // If two PHINodes have the same canonical values, but different aggregate
1377 // argument locations, then they will have distinct Canonical Values.
1378 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1379 if (!GVN)
1380 return;
1381 } else {
1382 // If we do not have a PHINode we use the global value numbering for the
1383 // output value, to find the canonical number to add to the set of stored
1384 // values.
1385 GVN = C.getGVN(Output);
1386 GVN = C.getCanonicalNum(*GVN);
1387 }
1388
1389 // Each region has a potentially unique set of outputs. We save which
1390 // values are output in a list of canonical values so we can differentiate
1391 // among the different store schemes.
1392 Region.GVNStores.push_back(*GVN);
1393
1394 OriginalIndex++;
1395 TypeIndex++;
1396 }
1397
1398 // We sort the stored values to make sure that we are not affected by analysis
1399 // order when determining what combination of items were stored.
1400 stable_sort(Region.GVNStores);
1401}
1402
1403void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1404 DenseSet<unsigned> &NotSame) {
1405 std::vector<unsigned> Inputs;
1406 SetVector<Value *> ArgInputs, Outputs;
1407
1408 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1409 Outputs);
1410
1411 if (Region.IgnoreRegion)
1412 return;
1413
1414 // Map the inputs found by the CodeExtractor to the arguments found for
1415 // the overall function.
1417
1418 // Map the outputs found by the CodeExtractor to the arguments found for
1419 // the overall function.
1421}
1422
1423/// Replace the extracted function in the Region with a call to the overall
1424/// function constructed from the deduplicated similar regions, replacing and
1425/// remapping the values passed to the extracted function as arguments to the
1426/// new arguments of the overall function.
1427///
1428/// \param [in] M - The module to outline from.
1429/// \param [in] Region - The regions of extracted code to be replaced with a new
1430/// function.
1431/// \returns a call instruction with the replaced function.
1433 std::vector<Value *> NewCallArgs;
1435
1436 OutlinableGroup &Group = *Region.Parent;
1437 CallInst *Call = Region.Call;
1438 assert(Call && "Call to replace is nullptr?");
1439 Function *AggFunc = Group.OutlinedFunction;
1440 assert(AggFunc && "Function to replace with is nullptr?");
1441
1442 // If the arguments are the same size, there are not values that need to be
1443 // made into an argument, the argument ordering has not been change, or
1444 // different output registers to handle. We can simply replace the called
1445 // function in this case.
1446 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1447 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1448 << *AggFunc << " with same number of arguments\n");
1449 Call->setCalledFunction(AggFunc);
1450 return Call;
1451 }
1452
1453 // We have a different number of arguments than the new function, so
1454 // we need to use our previously mappings off extracted argument to overall
1455 // function argument, and constants to overall function argument to create the
1456 // new argument list.
1457 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1458
1459 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1460 Group.OutputGVNCombinations.size() > 1) {
1461 // If we are on the last argument, and we need to differentiate between
1462 // output blocks, add an integer to the argument list to determine
1463 // what block to take
1464 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1465 << Region.OutputBlockNum << "\n");
1466 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1467 Region.OutputBlockNum));
1468 continue;
1469 }
1470
1471 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1472 if (ArgPair != Region.AggArgToExtracted.end()) {
1473 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1474 // If we found the mapping from the extracted function to the overall
1475 // function, we simply add it to the argument list. We use the same
1476 // value, it just needs to honor the new order of arguments.
1477 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1478 << *ArgumentValue << "\n");
1479 NewCallArgs.push_back(ArgumentValue);
1480 continue;
1481 }
1482
1483 // If it is a constant, we simply add it to the argument list as a value.
1484 if (Region.AggArgToConstant.contains(AggArgIdx)) {
1485 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1486 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1487 << *CST << "\n");
1488 NewCallArgs.push_back(CST);
1489 continue;
1490 }
1491
1492 // Add a nullptr value if the argument is not found in the extracted
1493 // function. If we cannot find a value, it means it is not in use
1494 // for the region, so we should not pass anything to it.
1495 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1496 NewCallArgs.push_back(ConstantPointerNull::get(
1497 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1498 }
1499
1500 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1501 << *AggFunc << " with new set of arguments\n");
1502 // Create the new call instruction and erase the old one.
1503 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1504 Call->getIterator());
1505
1506 // It is possible that the call to the outlined function is either the first
1507 // instruction is in the new block, the last instruction, or both. If either
1508 // of these is the case, we need to make sure that we replace the instruction
1509 // in the IRInstructionData struct with the new call.
1510 CallInst *OldCall = Region.Call;
1511 if (Region.NewFront->Inst == OldCall)
1512 Region.NewFront->Inst = Call;
1513 if (Region.NewBack->Inst == OldCall)
1514 Region.NewBack->Inst = Call;
1515
1516 // Transfer any debug information.
1517 Call->setDebugLoc(Region.Call->getDebugLoc());
1518 // Since our output may determine which branch we go to, we make sure to
1519 // propogate this new call value through the module.
1520 OldCall->replaceAllUsesWith(Call);
1521
1522 // Remove the old instruction.
1523 OldCall->eraseFromParent();
1524 Region.Call = Call;
1525
1526 // Make sure that the argument in the new function has the SwiftError
1527 // argument.
1528 if (Group.SwiftErrorArgument)
1529 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1530
1531 return Call;
1532}
1533
1534/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1535/// for \p RetVal.
1536///
1537/// \param Group - The OutlinableGroup containing the information about the
1538/// overall outlined function.
1539/// \param RetVal - The return value or exit option that we are currently
1540/// evaluating.
1541/// \returns The found or newly created BasicBlock to contain the needed
1542/// PHINodes to be used as outputs.
1545 ReturnBlockForRetVal;
1546 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1547 ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1548 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1549 "Could not find output value!");
1550 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1551
1552 // Find if a PHIBlock exists for this return value already. If it is
1553 // the first time we are analyzing this, we will not, so we record it.
1554 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1555 if (PhiBlockForRetVal != Group.PHIBlocks.end())
1556 return PhiBlockForRetVal->second;
1557
1558 // If we did not find a block, we create one, and insert it into the
1559 // overall function and record it.
1560 bool Inserted = false;
1561 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1562 ReturnBB->getParent());
1563 std::tie(PhiBlockForRetVal, Inserted) =
1564 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1565
1566 // We find the predecessors of the return block in the newly created outlined
1567 // function in order to point them to the new PHIBlock rather than the already
1568 // existing return block.
1569 SmallVector<BranchInst *, 2> BranchesToChange;
1570 for (BasicBlock *Pred : predecessors(ReturnBB))
1571 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1572
1573 // Now we mark the branch instructions found, and change the references of the
1574 // return block to the newly created PHIBlock.
1575 for (BranchInst *BI : BranchesToChange)
1576 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1577 if (BI->getSuccessor(Succ) != ReturnBB)
1578 continue;
1579 BI->setSuccessor(Succ, PHIBlock);
1580 }
1581
1582 BranchInst::Create(ReturnBB, PHIBlock);
1583
1584 return PhiBlockForRetVal->second;
1585}
1586
1587/// For the function call now representing the \p Region, find the passed value
1588/// to that call that represents Argument \p A at the call location if the
1589/// call has already been replaced with a call to the overall, aggregate
1590/// function.
1591///
1592/// \param A - The Argument to get the passed value for.
1593/// \param Region - The extracted Region corresponding to the outlined function.
1594/// \returns The Value representing \p A at the call site.
1595static Value *
1597 const OutlinableRegion &Region) {
1598 // If we don't need to adjust the argument number at all (since the call
1599 // has already been replaced by a call to the overall outlined function)
1600 // we can just get the specified argument.
1601 return Region.Call->getArgOperand(A->getArgNo());
1602}
1603
1604/// For the function call now representing the \p Region, find the passed value
1605/// to that call that represents Argument \p A at the call location if the
1606/// call has only been replaced by the call to the aggregate function.
1607///
1608/// \param A - The Argument to get the passed value for.
1609/// \param Region - The extracted Region corresponding to the outlined function.
1610/// \returns The Value representing \p A at the call site.
1611static Value *
1613 const OutlinableRegion &Region) {
1614 unsigned ArgNum = A->getArgNo();
1615
1616 // If it is a constant, we can look at our mapping from when we created
1617 // the outputs to figure out what the constant value is.
1618 if (Region.AggArgToConstant.count(ArgNum))
1619 return Region.AggArgToConstant.find(ArgNum)->second;
1620
1621 // If it is not a constant, and we are not looking at the overall function, we
1622 // need to adjust which argument we are looking at.
1623 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1624 return Region.Call->getArgOperand(ArgNum);
1625}
1626
1627/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1628///
1629/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1630/// \param Region [in] - The OutlinableRegion containing \p PN.
1631/// \param OutputMappings [in] - The mapping of output values from outlined
1632/// region to their original values.
1633/// \param CanonNums [out] - The canonical numbering for the incoming values to
1634/// \p PN paired with their incoming block.
1635/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1636/// of \p Region rather than the overall function's call.
1639 const DenseMap<Value *, Value *> &OutputMappings,
1640 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1641 bool ReplacedWithOutlinedCall = true) {
1642 // Iterate over the incoming values.
1643 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1644 Value *IVal = PN->getIncomingValue(Idx);
1645 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1646 // If we have an argument as incoming value, we need to grab the passed
1647 // value from the call itself.
1648 if (Argument *A = dyn_cast<Argument>(IVal)) {
1649 if (ReplacedWithOutlinedCall)
1651 else
1653 }
1654
1655 // Get the original value if it has been replaced by an output value.
1656 IVal = findOutputMapping(OutputMappings, IVal);
1657
1658 // Find and add the canonical number for the incoming value.
1659 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1660 assert(GVN && "No GVN for incoming value");
1661 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1662 assert(CanonNum && "No Canonical Number for GVN");
1663 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1664 }
1665}
1666
1667/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1668/// in order to condense the number of instructions added to the outlined
1669/// function.
1670///
1671/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1672/// \param Region [in] - The OutlinableRegion containing \p PN.
1673/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1674/// \p PN in.
1675/// \param OutputMappings [in] - The mapping of output values from outlined
1676/// region to their original values.
1677/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1678/// matched.
1679/// \return the newly found or created PHINode in \p OverallPhiBlock.
1680static PHINode*
1682 BasicBlock *OverallPhiBlock,
1683 const DenseMap<Value *, Value *> &OutputMappings,
1684 DenseSet<PHINode *> &UsedPHIs) {
1685 OutlinableGroup &Group = *Region.Parent;
1686
1687
1688 // A list of the canonical numbering assigned to each incoming value, paired
1689 // with the incoming block for the PHINode passed into this function.
1691
1692 // We have to use the extracted function since we have merged this region into
1693 // the overall function yet. We make sure to reassign the argument numbering
1694 // since it is possible that the argument ordering is different between the
1695 // functions.
1696 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1697 /* ReplacedWithOutlinedCall = */ false);
1698
1699 OutlinableRegion *FirstRegion = Group.Regions[0];
1700
1701 // A list of the canonical numbering assigned to each incoming value, paired
1702 // with the incoming block for the PHINode that we are currently comparing
1703 // the passed PHINode to.
1705
1706 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1707 // the uses of the PHINode we are searching for, with the found PHINode.
1708 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1709 // If this PHINode has already been matched to another PHINode to be merged,
1710 // we skip it.
1711 if (UsedPHIs.contains(&CurrPN))
1712 continue;
1713
1714 CurrentCanonNums.clear();
1715 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1716 /* ReplacedWithOutlinedCall = */ true);
1717
1718 // If the list of incoming values is not the same length, then they cannot
1719 // match since there is not an analogue for each incoming value.
1720 if (PNCanonNums.size() != CurrentCanonNums.size())
1721 continue;
1722
1723 bool FoundMatch = true;
1724
1725 // We compare the canonical value for each incoming value in the passed
1726 // in PHINode to one already present in the outlined region. If the
1727 // incoming values do not match, then the PHINodes do not match.
1728
1729 // We also check to make sure that the incoming block matches as well by
1730 // finding the corresponding incoming block in the combined outlined region
1731 // for the current outlined region.
1732 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1733 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1734 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1735 if (ToCompareTo.first != ToAdd.first) {
1736 FoundMatch = false;
1737 break;
1738 }
1739
1740 BasicBlock *CorrespondingBlock =
1741 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1742 assert(CorrespondingBlock && "Found block is nullptr");
1743 if (CorrespondingBlock != ToCompareTo.second) {
1744 FoundMatch = false;
1745 break;
1746 }
1747 }
1748
1749 // If all incoming values and branches matched, then we can merge
1750 // into the found PHINode.
1751 if (FoundMatch) {
1752 UsedPHIs.insert(&CurrPN);
1753 return &CurrPN;
1754 }
1755 }
1756
1757 // If we've made it here, it means we weren't able to replace the PHINode, so
1758 // we must insert it ourselves.
1759 PHINode *NewPN = cast<PHINode>(PN.clone());
1760 NewPN->insertBefore(&*OverallPhiBlock->begin());
1761 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1762 Idx++) {
1763 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1764 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1765
1766 // Find corresponding basic block in the overall function for the incoming
1767 // block.
1768 BasicBlock *BlockToUse =
1769 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1770 NewPN->setIncomingBlock(Idx, BlockToUse);
1771
1772 // If we have an argument we make sure we replace using the argument from
1773 // the correct function.
1774 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1775 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1776 NewPN->setIncomingValue(Idx, Val);
1777 continue;
1778 }
1779
1780 // Find the corresponding value in the overall function.
1781 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1782 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1783 assert(Val && "Value is nullptr?");
1785 FirstRegion->RemappedArguments.find(Val);
1786 if (RemappedIt != FirstRegion->RemappedArguments.end())
1787 Val = RemappedIt->second;
1788 NewPN->setIncomingValue(Idx, Val);
1789 }
1790 return NewPN;
1791}
1792
1793// Within an extracted function, replace the argument uses of the extracted
1794// region with the arguments of the function for an OutlinableGroup.
1795//
1796/// \param [in] Region - The region of extracted code to be changed.
1797/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1798/// region.
1799/// \param [in] FirstFunction - A flag to indicate whether we are using this
1800/// function to define the overall outlined function for all the regions, or
1801/// if we are operating on one of the following regions.
1802static void
1805 const DenseMap<Value *, Value *> &OutputMappings,
1806 bool FirstFunction = false) {
1807 OutlinableGroup &Group = *Region.Parent;
1808 assert(Region.ExtractedFunction && "Region has no extracted function?");
1809
1810 Function *DominatingFunction = Region.ExtractedFunction;
1811 if (FirstFunction)
1812 DominatingFunction = Group.OutlinedFunction;
1813 DominatorTree DT(*DominatingFunction);
1814 DenseSet<PHINode *> UsedPHIs;
1815
1816 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1817 ArgIdx++) {
1818 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1819 "No mapping from extracted to outlined?");
1820 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1821 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1822 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1823 // The argument is an input, so we can simply replace it with the overall
1824 // argument value
1825 if (ArgIdx < Region.NumExtractedInputs) {
1826 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1827 << *Region.ExtractedFunction << " with " << *AggArg
1828 << " in function " << *Group.OutlinedFunction << "\n");
1829 Arg->replaceAllUsesWith(AggArg);
1830 Value *V = Region.Call->getArgOperand(ArgIdx);
1831 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1832 continue;
1833 }
1834
1835 // If we are replacing an output, we place the store value in its own
1836 // block inside the overall function before replacing the use of the output
1837 // in the function.
1838 assert(Arg->hasOneUse() && "Output argument can only have one use");
1839 User *InstAsUser = Arg->user_back();
1840 assert(InstAsUser && "User is nullptr!");
1841
1842 Instruction *I = cast<Instruction>(InstAsUser);
1843 BasicBlock *BB = I->getParent();
1844 SmallVector<BasicBlock *, 4> Descendants;
1845 DT.getDescendants(BB, Descendants);
1846 bool EdgeAdded = false;
1847 if (Descendants.size() == 0) {
1848 EdgeAdded = true;
1849 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1850 DT.getDescendants(BB, Descendants);
1851 }
1852
1853 // Iterate over the following blocks, looking for return instructions,
1854 // if we find one, find the corresponding output block for the return value
1855 // and move our store instruction there.
1856 for (BasicBlock *DescendBB : Descendants) {
1857 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1858 if (!RI)
1859 continue;
1860 Value *RetVal = RI->getReturnValue();
1861 auto VBBIt = OutputBBs.find(RetVal);
1862 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1863
1864 // If this is storing a PHINode, we must make sure it is included in the
1865 // overall function.
1866 StoreInst *SI = cast<StoreInst>(I);
1867
1868 Value *ValueOperand = SI->getValueOperand();
1869
1870 StoreInst *NewI = cast<StoreInst>(I->clone());
1871 NewI->setDebugLoc(DebugLoc());
1872 BasicBlock *OutputBB = VBBIt->second;
1873 NewI->insertInto(OutputBB, OutputBB->end());
1874 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1875 << *OutputBB << "\n");
1876
1877 // If this is storing a PHINode, we must make sure it is included in the
1878 // overall function.
1879 if (!isa<PHINode>(ValueOperand) ||
1880 Region.Candidate->getGVN(ValueOperand).has_value()) {
1881 if (FirstFunction)
1882 continue;
1883 Value *CorrVal =
1884 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1885 assert(CorrVal && "Value is nullptr?");
1886 NewI->setOperand(0, CorrVal);
1887 continue;
1888 }
1889 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1890 // If it has a value, it was not split by the code extractor, which
1891 // is what we are looking for.
1892 if (Region.Candidate->getGVN(PN))
1893 continue;
1894
1895 // We record the parent block for the PHINode in the Region so that
1896 // we can exclude it from checks later on.
1897 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1898
1899 // If this is the first function, we do not need to worry about mergiing
1900 // this with any other block in the overall outlined function, so we can
1901 // just continue.
1902 if (FirstFunction) {
1903 BasicBlock *PHIBlock = PN->getParent();
1904 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1905 continue;
1906 }
1907
1908 // We look for the aggregate block that contains the PHINodes leading into
1909 // this exit path. If we can't find one, we create one.
1910 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1911
1912 // For our PHINode, we find the combined canonical numbering, and
1913 // attempt to find a matching PHINode in the overall PHIBlock. If we
1914 // cannot, we copy the PHINode and move it into this new block.
1915 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1916 OutputMappings, UsedPHIs);
1917 NewI->setOperand(0, NewPN);
1918 }
1919
1920 // If we added an edge for basic blocks without a predecessor, we remove it
1921 // here.
1922 if (EdgeAdded)
1923 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1924 I->eraseFromParent();
1925
1926 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1927 << *Region.ExtractedFunction << " with " << *AggArg
1928 << " in function " << *Group.OutlinedFunction << "\n");
1929 Arg->replaceAllUsesWith(AggArg);
1930 }
1931}
1932
1933/// Within an extracted function, replace the constants that need to be lifted
1934/// into arguments with the actual argument.
1935///
1936/// \param Region [in] - The region of extracted code to be changed.
1938 OutlinableGroup &Group = *Region.Parent;
1939 // Iterate over the constants that need to be elevated into arguments
1940 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1941 unsigned AggArgIdx = Const.first;
1942 Function *OutlinedFunction = Group.OutlinedFunction;
1943 assert(OutlinedFunction && "Overall Function is not defined?");
1944 Constant *CST = Const.second;
1945 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1946 // Identify the argument it will be elevated to, and replace instances of
1947 // that constant in the function.
1948
1949 // TODO: If in the future constants do not have one global value number,
1950 // i.e. a constant 1 could be mapped to several values, this check will
1951 // have to be more strict. It cannot be using only replaceUsesWithIf.
1952
1953 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1954 << " in function " << *OutlinedFunction << " with "
1955 << *Arg << "\n");
1956 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1957 if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1958 return I->getFunction() == OutlinedFunction;
1959 return false;
1960 });
1961 }
1962}
1963
1964/// It is possible that there is a basic block that already performs the same
1965/// stores. This returns a duplicate block, if it exists
1966///
1967/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1968/// \param OutputStoreBBs [in] The existing output blocks.
1969/// \returns an optional value with the number output block if there is a match.
1970std::optional<unsigned> findDuplicateOutputBlock(
1972 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1973
1974 bool Mismatch = false;
1975 unsigned MatchingNum = 0;
1976 // We compare the new set output blocks to the other sets of output blocks.
1977 // If they are the same number, and have identical instructions, they are
1978 // considered to be the same.
1979 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1980 Mismatch = false;
1981 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1983 OutputBBs.find(VToB.first);
1984 if (OutputBBIt == OutputBBs.end()) {
1985 Mismatch = true;
1986 break;
1987 }
1988
1989 BasicBlock *CompBB = VToB.second;
1990 BasicBlock *OutputBB = OutputBBIt->second;
1991 if (CompBB->size() - 1 != OutputBB->size()) {
1992 Mismatch = true;
1993 break;
1994 }
1995
1996 BasicBlock::iterator NIt = OutputBB->begin();
1997 for (Instruction &I : *CompBB) {
1998 if (isa<BranchInst>(&I))
1999 continue;
2000
2001 if (!I.isIdenticalTo(&(*NIt))) {
2002 Mismatch = true;
2003 break;
2004 }
2005
2006 NIt++;
2007 }
2008 }
2009
2010 if (!Mismatch)
2011 return MatchingNum;
2012
2013 MatchingNum++;
2014 }
2015
2016 return std::nullopt;
2017}
2018
2019/// Remove empty output blocks from the outlined region.
2020///
2021/// \param BlocksToPrune - Mapping of return values output blocks for the \p
2022/// Region.
2023/// \param Region - The OutlinableRegion we are analyzing.
2024static bool
2027 bool AllRemoved = true;
2028 Value *RetValueForBB;
2029 BasicBlock *NewBB;
2031 // Iterate over the output blocks created in the outlined section.
2032 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2033 RetValueForBB = VtoBB.first;
2034 NewBB = VtoBB.second;
2035
2036 // If there are no instructions, we remove it from the module, and also
2037 // mark the value for removal from the return value to output block mapping.
2038 if (NewBB->size() == 0) {
2039 NewBB->eraseFromParent();
2040 ToRemove.push_back(RetValueForBB);
2041 continue;
2042 }
2043
2044 // Mark that we could not remove all the blocks since they were not all
2045 // empty.
2046 AllRemoved = false;
2047 }
2048
2049 // Remove the return value from the mapping.
2050 for (Value *V : ToRemove)
2051 BlocksToPrune.erase(V);
2052
2053 // Mark the region as having the no output scheme.
2054 if (AllRemoved)
2055 Region.OutputBlockNum = -1;
2056
2057 return AllRemoved;
2058}
2059
2060/// For the outlined section, move needed the StoreInsts for the output
2061/// registers into their own block. Then, determine if there is a duplicate
2062/// output block already created.
2063///
2064/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2065/// \param [in] Region - The OutlinableRegion that is being analyzed.
2066/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2067/// placed in.
2068/// \param [in] EndBBs - the final blocks of the extracted function.
2069/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2070/// been replaced by a new output value.
2071/// \param [in,out] OutputStoreBBs - The existing output blocks.
2076 const DenseMap<Value *, Value *> &OutputMappings,
2077 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2078 // If none of the output blocks have any instructions, this means that we do
2079 // not have to determine if it matches any of the other output schemes, and we
2080 // don't have to do anything else.
2081 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2082 return;
2083
2084 // Determine is there is a duplicate set of blocks.
2085 std::optional<unsigned> MatchingBB =
2086 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2087
2088 // If there is, we remove the new output blocks. If it does not,
2089 // we add it to our list of sets of output blocks.
2090 if (MatchingBB) {
2091 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2092 << Region.ExtractedFunction << " to " << *MatchingBB);
2093
2094 Region.OutputBlockNum = *MatchingBB;
2095 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2096 VtoBB.second->eraseFromParent();
2097 return;
2098 }
2099
2100 Region.OutputBlockNum = OutputStoreBBs.size();
2101
2102 Value *RetValueForBB;
2103 BasicBlock *NewBB;
2104 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2105 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2106 RetValueForBB = VtoBB.first;
2107 NewBB = VtoBB.second;
2109 EndBBs.find(RetValueForBB);
2110 LLVM_DEBUG(dbgs() << "Create output block for region in"
2111 << Region.ExtractedFunction << " to "
2112 << *NewBB);
2113 BranchInst::Create(VBBIt->second, NewBB);
2114 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2115 }
2116}
2117
2118/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2119/// before creating a basic block for each \p NewMap, and inserting into the new
2120/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2121///
2122/// \param OldMap [in] - The mapping to base the new mapping off of.
2123/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2124/// \param ParentFunc [in] - The function to put the new basic block in.
2125/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2126/// by an index value.
2129 Function *ParentFunc, Twine BaseName) {
2130 unsigned Idx = 0;
2131 std::vector<Value *> SortedKeys;
2132
2133 getSortedConstantKeys(SortedKeys, OldMap);
2134
2135 for (Value *RetVal : SortedKeys) {
2137 ParentFunc->getContext(),
2138 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2139 ParentFunc);
2140 NewMap.insert(std::make_pair(RetVal, NewBB));
2141 }
2142}
2143
2144/// Create the switch statement for outlined function to differentiate between
2145/// all the output blocks.
2146///
2147/// For the outlined section, determine if an outlined block already exists that
2148/// matches the needed stores for the extracted section.
2149/// \param [in] M - The module we are outlining from.
2150/// \param [in] OG - The group of regions to be outlined.
2151/// \param [in] EndBBs - The final blocks of the extracted function.
2152/// \param [in,out] OutputStoreBBs - The existing output blocks.
2155 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2156 // We only need the switch statement if there is more than one store
2157 // combination, or there is more than one set of output blocks. The first
2158 // will occur when we store different sets of values for two different
2159 // regions. The second will occur when we have two outputs that are combined
2160 // in a PHINode outside of the region in one outlined instance, and are used
2161 // seaparately in another. This will create the same set of OutputGVNs, but
2162 // will generate two different output schemes.
2163 if (OG.OutputGVNCombinations.size() > 1) {
2164 Function *AggFunc = OG.OutlinedFunction;
2165 // Create a final block for each different return block.
2167 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2168
2169 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2170 std::pair<Value *, BasicBlock *> &OutputBlock =
2171 *OG.EndBBs.find(RetBlockPair.first);
2172 BasicBlock *ReturnBlock = RetBlockPair.second;
2173 BasicBlock *EndBB = OutputBlock.second;
2174 Instruction *Term = EndBB->getTerminator();
2175 // Move the return value to the final block instead of the original exit
2176 // stub.
2177 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2178 // Put the switch statement in the old end basic block for the function
2179 // with a fall through to the new return block.
2180 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2181 << OutputStoreBBs.size() << "\n");
2182 SwitchInst *SwitchI =
2183 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2184 ReturnBlock, OutputStoreBBs.size(), EndBB);
2185
2186 unsigned Idx = 0;
2187 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2189 OutputStoreBB.find(OutputBlock.first);
2190
2191 if (OSBBIt == OutputStoreBB.end())
2192 continue;
2193
2194 BasicBlock *BB = OSBBIt->second;
2195 SwitchI->addCase(
2196 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2197 Term = BB->getTerminator();
2198 Term->setSuccessor(0, ReturnBlock);
2199 Idx++;
2200 }
2201 }
2202 return;
2203 }
2204
2205 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2206
2207 // If there needs to be stores, move them from the output blocks to their
2208 // corresponding ending block. We do not check that the OutputGVNCombinations
2209 // is equal to 1 here since that could just been the case where there are 0
2210 // outputs. Instead, we check whether there is more than one set of output
2211 // blocks since this is the only case where we would have to move the
2212 // stores, and erase the extraneous blocks.
2213 if (OutputStoreBBs.size() == 1) {
2214 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2215 << *OG.OutlinedFunction << "\n");
2216 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2217 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2219 EndBBs.find(VBPair.first);
2220 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2221 BasicBlock *EndBB = EndBBIt->second;
2222 BasicBlock *OutputBB = VBPair.second;
2223 Instruction *Term = OutputBB->getTerminator();
2224 Term->eraseFromParent();
2225 Term = EndBB->getTerminator();
2226 moveBBContents(*OutputBB, *EndBB);
2227 Term->moveBefore(*EndBB, EndBB->end());
2228 OutputBB->eraseFromParent();
2229 }
2230 }
2231}
2232
2233/// Fill the new function that will serve as the replacement function for all of
2234/// the extracted regions of a certain structure from the first region in the
2235/// list of regions. Replace this first region's extracted function with the
2236/// new overall function.
2237///
2238/// \param [in] M - The module we are outlining from.
2239/// \param [in] CurrentGroup - The group of regions to be outlined.
2240/// \param [in,out] OutputStoreBBs - The output blocks for each different
2241/// set of stores needed for the different functions.
2242/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2243/// once outlining is complete.
2244/// \param [in] OutputMappings - Extracted functions to erase from module
2245/// once outlining is complete.
2247 Module &M, OutlinableGroup &CurrentGroup,
2248 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2249 std::vector<Function *> &FuncsToRemove,
2250 const DenseMap<Value *, Value *> &OutputMappings) {
2251 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2252
2253 // Move first extracted function's instructions into new function.
2254 LLVM_DEBUG(dbgs() << "Move instructions from "
2255 << *CurrentOS->ExtractedFunction << " to instruction "
2256 << *CurrentGroup.OutlinedFunction << "\n");
2258 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2259
2260 // Transfer the attributes from the function to the new function.
2261 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2262 CurrentGroup.OutlinedFunction->addFnAttr(A);
2263
2264 // Create a new set of output blocks for the first extracted function.
2266 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2267 CurrentGroup.OutlinedFunction, "output_block_0");
2268 CurrentOS->OutputBlockNum = 0;
2269
2270 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2271 replaceConstants(*CurrentOS);
2272
2273 // We first identify if any output blocks are empty, if they are we remove
2274 // them. We then create a branch instruction to the basic block to the return
2275 // block for the function for each non empty output block.
2276 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2277 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2278 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2280 CurrentGroup.EndBBs.find(VToBB.first);
2281 BasicBlock *EndBB = VBBIt->second;
2282 BranchInst::Create(EndBB, VToBB.second);
2283 OutputStoreBBs.back().insert(VToBB);
2284 }
2285 }
2286
2287 // Replace the call to the extracted function with the outlined function.
2288 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2289
2290 // We only delete the extracted functions at the end since we may need to
2291 // reference instructions contained in them for mapping purposes.
2292 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2293}
2294
2295void IROutliner::deduplicateExtractedSections(
2296 Module &M, OutlinableGroup &CurrentGroup,
2297 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2298 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2299
2300 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2301
2302 OutlinableRegion *CurrentOS;
2303
2304 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2305 OutputMappings);
2306
2307 std::vector<Value *> SortedKeys;
2308 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2309 CurrentOS = CurrentGroup.Regions[Idx];
2311 *CurrentOS->ExtractedFunction);
2312
2313 // Create a set of BasicBlocks, one for each return block, to hold the
2314 // needed store instructions.
2317 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2318 "output_block_" + Twine(static_cast<unsigned>(Idx)));
2319 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2320 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2321 CurrentGroup.EndBBs, OutputMappings,
2322 OutputStoreBBs);
2323
2324 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2325 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2326 }
2327
2328 // Create a switch statement to handle the different output schemes.
2329 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2330
2331 OutlinedFunctionNum++;
2332}
2333
2334/// Checks that the next instruction in the InstructionDataList matches the
2335/// next instruction in the module. If they do not, there could be the
2336/// possibility that extra code has been inserted, and we must ignore it.
2337///
2338/// \param ID - The IRInstructionData to check the next instruction of.
2339/// \returns true if the InstructionDataList and actual instruction match.
2341 // We check if there is a discrepancy between the InstructionDataList
2342 // and the actual next instruction in the module. If there is, it means
2343 // that an extra instruction was added, likely by the CodeExtractor.
2344
2345 // Since we do not have any similarity data about this particular
2346 // instruction, we cannot confidently outline it, and must discard this
2347 // candidate.
2348 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2349 Instruction *NextIDLInst = NextIDIt->Inst;
2350 Instruction *NextModuleInst = nullptr;
2351 if (!ID.Inst->isTerminator())
2352 NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2353 else if (NextIDLInst != nullptr)
2354 NextModuleInst =
2355 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2356
2357 if (NextIDLInst && NextIDLInst != NextModuleInst)
2358 return false;
2359
2360 return true;
2361}
2362
2363bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2364 const OutlinableRegion &Region) {
2365 IRSimilarityCandidate *IRSC = Region.Candidate;
2366 unsigned StartIdx = IRSC->getStartIdx();
2367 unsigned EndIdx = IRSC->getEndIdx();
2368
2369 // A check to make sure that we are not about to attempt to outline something
2370 // that has already been outlined.
2371 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2372 if (Outlined.contains(Idx))
2373 return false;
2374
2375 // We check if the recorded instruction matches the actual next instruction,
2376 // if it does not, we fix it in the InstructionDataList.
2377 if (!Region.Candidate->backInstruction()->isTerminator()) {
2378 Instruction *NewEndInst =
2379 Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2380 assert(NewEndInst && "Next instruction is a nullptr?");
2381 if (Region.Candidate->end()->Inst != NewEndInst) {
2382 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2383 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2384 IRInstructionData(*NewEndInst,
2385 InstructionClassifier.visit(*NewEndInst), *IDL);
2386
2387 // Insert the first IRInstructionData of the new region after the
2388 // last IRInstructionData of the IRSimilarityCandidate.
2389 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2390 }
2391 }
2392
2393 return none_of(*IRSC, [this](IRInstructionData &ID) {
2395 return true;
2396
2397 return !this->InstructionClassifier.visit(ID.Inst);
2398 });
2399}
2400
2401void IROutliner::pruneIncompatibleRegions(
2402 std::vector<IRSimilarityCandidate> &CandidateVec,
2403 OutlinableGroup &CurrentGroup) {
2404 bool PreviouslyOutlined;
2405
2406 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2407 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2408 const IRSimilarityCandidate &RHS) {
2409 return LHS.getStartIdx() < RHS.getStartIdx();
2410 });
2411
2412 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2413 // Since outlining a call and a branch instruction will be the same as only
2414 // outlinining a call instruction, we ignore it as a space saving.
2415 if (FirstCandidate.getLength() == 2) {
2416 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2417 isa<BranchInst>(FirstCandidate.back()->Inst))
2418 return;
2419 }
2420
2421 unsigned CurrentEndIdx = 0;
2422 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2423 PreviouslyOutlined = false;
2424 unsigned StartIdx = IRSC.getStartIdx();
2425 unsigned EndIdx = IRSC.getEndIdx();
2426 const Function &FnForCurrCand = *IRSC.getFunction();
2427
2428 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2429 if (Outlined.contains(Idx)) {
2430 PreviouslyOutlined = true;
2431 break;
2432 }
2433
2434 if (PreviouslyOutlined)
2435 continue;
2436
2437 // Check over the instructions, and if the basic block has its address
2438 // taken for use somewhere else, we do not outline that block.
2439 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2440 return ID.Inst->getParent()->hasAddressTaken();
2441 });
2442
2443 if (BBHasAddressTaken)
2444 continue;
2445
2446 if (FnForCurrCand.hasOptNone())
2447 continue;
2448
2449 if (FnForCurrCand.hasFnAttribute("nooutline")) {
2450 LLVM_DEBUG({
2451 dbgs() << "... Skipping function with nooutline attribute: "
2452 << FnForCurrCand.getName() << "\n";
2453 });
2454 continue;
2455 }
2456
2457 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2458 !OutlineFromLinkODRs)
2459 continue;
2460
2461 // Greedily prune out any regions that will overlap with already chosen
2462 // regions.
2463 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2464 continue;
2465
2466 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2468 return true;
2469
2470 return !this->InstructionClassifier.visit(ID.Inst);
2471 });
2472
2473 if (BadInst)
2474 continue;
2475
2476 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2477 OutlinableRegion(IRSC, CurrentGroup);
2478 CurrentGroup.Regions.push_back(OS);
2479
2480 CurrentEndIdx = EndIdx;
2481 }
2482}
2483
2485IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2486 InstructionCost RegionBenefit = 0;
2487 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2488 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2489 // We add the number of instructions in the region to the benefit as an
2490 // estimate as to how much will be removed.
2491 RegionBenefit += Region->getBenefit(TTI);
2492 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2493 << " saved instructions to overfall benefit.\n");
2494 }
2495
2496 return RegionBenefit;
2497}
2498
2499/// For the \p OutputCanon number passed in find the value represented by this
2500/// canonical number. If it is from a PHINode, we pick the first incoming
2501/// value and return that Value instead.
2502///
2503/// \param Region - The OutlinableRegion to get the Value from.
2504/// \param OutputCanon - The canonical number to find the Value from.
2505/// \returns The Value represented by a canonical number \p OutputCanon in \p
2506/// Region.
2508 unsigned OutputCanon) {
2509 OutlinableGroup &CurrentGroup = *Region.Parent;
2510 // If the value is greater than the value in the tracker, we have a
2511 // PHINode and will instead use one of the incoming values to find the
2512 // type.
2513 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2514 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2515 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2516 "Could not find GVN set for PHINode number!");
2517 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2518 OutputCanon = *It->second.second.begin();
2519 }
2520 std::optional<unsigned> OGVN =
2521 Region.Candidate->fromCanonicalNum(OutputCanon);
2522 assert(OGVN && "Could not find GVN for Canonical Number?");
2523 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2524 assert(OV && "Could not find value for GVN?");
2525 return *OV;
2526}
2527
2529IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2530 InstructionCost OverallCost = 0;
2531 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2532 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2533
2534 // Each output incurs a load after the call, so we add that to the cost.
2535 for (unsigned OutputCanon : Region->GVNStores) {
2536 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2537 InstructionCost LoadCost =
2538 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2540
2541 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2542 << " instructions to cost for output of type "
2543 << *V->getType() << "\n");
2544 OverallCost += LoadCost;
2545 }
2546 }
2547
2548 return OverallCost;
2549}
2550
2551/// Find the extra instructions needed to handle any output values for the
2552/// region.
2553///
2554/// \param [in] M - The Module to outline from.
2555/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2556/// \param [in] TTI - The TargetTransformInfo used to collect information for
2557/// new instruction costs.
2558/// \returns the additional cost to handle the outputs.
2560 OutlinableGroup &CurrentGroup,
2562 InstructionCost OutputCost = 0;
2563 unsigned NumOutputBranches = 0;
2564
2565 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2566 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2567 DenseSet<BasicBlock *> CandidateBlocks;
2568 Candidate.getBasicBlocks(CandidateBlocks);
2569
2570 // Count the number of different output branches that point to blocks outside
2571 // of the region.
2572 DenseSet<BasicBlock *> FoundBlocks;
2573 for (IRInstructionData &ID : Candidate) {
2574 if (!isa<BranchInst>(ID.Inst))
2575 continue;
2576
2577 for (Value *V : ID.OperVals) {
2578 BasicBlock *BB = static_cast<BasicBlock *>(V);
2579 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2580 NumOutputBranches++;
2581 }
2582 }
2583
2584 CurrentGroup.BranchesToOutside = NumOutputBranches;
2585
2586 for (const ArrayRef<unsigned> &OutputUse :
2587 CurrentGroup.OutputGVNCombinations) {
2588 for (unsigned OutputCanon : OutputUse) {
2589 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2590 InstructionCost StoreCost =
2591 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2593
2594 // An instruction cost is added for each store set that needs to occur for
2595 // various output combinations inside the function, plus a branch to
2596 // return to the exit block.
2597 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2598 << " instructions to cost for output of type "
2599 << *V->getType() << "\n");
2600 OutputCost += StoreCost * NumOutputBranches;
2601 }
2602
2603 InstructionCost BranchCost =
2605 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2606 << " a branch instruction\n");
2607 OutputCost += BranchCost * NumOutputBranches;
2608 }
2609
2610 // If there is more than one output scheme, we must have a comparison and
2611 // branch for each different item in the switch statement.
2612 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2613 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2614 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2617 InstructionCost BranchCost =
2619
2620 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2621 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2622
2623 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2624 << " instructions for each switch case for each different"
2625 << " output path in a function\n");
2626 OutputCost += TotalCost * NumOutputBranches;
2627 }
2628
2629 return OutputCost;
2630}
2631
2632void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2633 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2634 CurrentGroup.Benefit += RegionBenefit;
2635 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2636
2637 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2638 CurrentGroup.Cost += OutputReloadCost;
2639 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2640
2641 InstructionCost AverageRegionBenefit =
2642 RegionBenefit / CurrentGroup.Regions.size();
2643 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2644 unsigned NumRegions = CurrentGroup.Regions.size();
2646 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2647
2648 // We add one region to the cost once, to account for the instructions added
2649 // inside of the newly created function.
2650 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2651 << " instructions to cost for body of new function.\n");
2652 CurrentGroup.Cost += AverageRegionBenefit;
2653 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2654
2655 // For each argument, we must add an instruction for loading the argument
2656 // out of the register and into a value inside of the newly outlined function.
2657 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2658 << " instructions to cost for each argument in the new"
2659 << " function.\n");
2660 CurrentGroup.Cost +=
2661 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2662 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2663
2664 // Each argument needs to either be loaded into a register or onto the stack.
2665 // Some arguments will only be loaded into the stack once the argument
2666 // registers are filled.
2667 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2668 << " instructions to cost for each argument in the new"
2669 << " function " << NumRegions << " times for the "
2670 << "needed argument handling at the call site.\n");
2671 CurrentGroup.Cost +=
2672 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2673 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2674
2675 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2676 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2677}
2678
2679void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2680 ArrayRef<Value *> Outputs,
2681 LoadInst *LI) {
2682 // For and load instructions following the call
2683 Value *Operand = LI->getPointerOperand();
2684 std::optional<unsigned> OutputIdx;
2685 // Find if the operand it is an output register.
2686 for (unsigned ArgIdx = Region.NumExtractedInputs;
2687 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2688 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2689 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2690 break;
2691 }
2692 }
2693
2694 // If we found an output register, place a mapping of the new value
2695 // to the original in the mapping.
2696 if (!OutputIdx)
2697 return;
2698
2699 if (!OutputMappings.contains(Outputs[*OutputIdx])) {
2700 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2701 << *Outputs[*OutputIdx] << "\n");
2702 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2703 } else {
2704 Value *Orig = OutputMappings.find(Outputs[*OutputIdx])->second;
2705 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2706 << *Outputs[*OutputIdx] << "\n");
2707 OutputMappings.insert(std::make_pair(LI, Orig));
2708 }
2709}
2710
2711bool IROutliner::extractSection(OutlinableRegion &Region) {
2712 SetVector<Value *> ArgInputs, Outputs, SinkCands;
2713 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2714 BasicBlock *InitialStart = Region.StartBB;
2715 Function *OrigF = Region.StartBB->getParent();
2716 CodeExtractorAnalysisCache CEAC(*OrigF);
2717 Region.ExtractedFunction =
2718 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2719
2720 // If the extraction was successful, find the BasicBlock, and reassign the
2721 // OutlinableRegion blocks
2722 if (!Region.ExtractedFunction) {
2723 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2724 << "\n");
2725 Region.reattachCandidate();
2726 return false;
2727 }
2728
2729 // Get the block containing the called branch, and reassign the blocks as
2730 // necessary. If the original block still exists, it is because we ended on
2731 // a branch instruction, and so we move the contents into the block before
2732 // and assign the previous block correctly.
2733 User *InstAsUser = Region.ExtractedFunction->user_back();
2734 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2735 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2736 assert(Region.PrevBB && "PrevBB is nullptr?");
2737 if (Region.PrevBB == InitialStart) {
2738 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2739 Instruction *BI = NewPrev->getTerminator();
2740 BI->eraseFromParent();
2741 moveBBContents(*InitialStart, *NewPrev);
2742 Region.PrevBB = NewPrev;
2743 InitialStart->eraseFromParent();
2744 }
2745
2746 Region.StartBB = RewrittenBB;
2747 Region.EndBB = RewrittenBB;
2748
2749 // The sequences of outlinable regions has now changed. We must fix the
2750 // IRInstructionDataList for consistency. Although they may not be illegal
2751 // instructions, they should not be compared with anything else as they
2752 // should not be outlined in this round. So marking these as illegal is
2753 // allowed.
2754 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2755 Instruction *BeginRewritten = &*RewrittenBB->begin();
2756 Instruction *EndRewritten = &*RewrittenBB->begin();
2757 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2758 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2759 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2760 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2761
2762 // Insert the first IRInstructionData of the new region in front of the
2763 // first IRInstructionData of the IRSimilarityCandidate.
2764 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2765 // Insert the first IRInstructionData of the new region after the
2766 // last IRInstructionData of the IRSimilarityCandidate.
2767 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2768 // Remove the IRInstructionData from the IRSimilarityCandidate.
2769 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2770
2771 assert(RewrittenBB != nullptr &&
2772 "Could not find a predecessor after extraction!");
2773
2774 // Iterate over the new set of instructions to find the new call
2775 // instruction.
2776 for (Instruction &I : *RewrittenBB)
2777 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2778 if (Region.ExtractedFunction == CI->getCalledFunction())
2779 Region.Call = CI;
2780 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2781 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2782 Region.reattachCandidate();
2783 return true;
2784}
2785
2786unsigned IROutliner::doOutline(Module &M) {
2787 // Find the possible similarity sections.
2788 InstructionClassifier.EnableBranches = !DisableBranches;
2789 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2790 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2791
2792 IRSimilarityIdentifier &Identifier = getIRSI(M);
2793 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2794
2795 // Sort them by size of extracted sections
2796 unsigned OutlinedFunctionNum = 0;
2797 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2798 // to sort them by the potential number of instructions to be outlined
2799 if (SimilarityCandidates.size() > 1)
2800 llvm::stable_sort(SimilarityCandidates,
2801 [](const std::vector<IRSimilarityCandidate> &LHS,
2802 const std::vector<IRSimilarityCandidate> &RHS) {
2803 return LHS[0].getLength() * LHS.size() >
2804 RHS[0].getLength() * RHS.size();
2805 });
2806 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2807 // each of the following for loops to avoid making an allocator.
2808 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2809
2810 DenseSet<unsigned> NotSame;
2811 std::vector<OutlinableGroup *> NegativeCostGroups;
2812 std::vector<OutlinableRegion *> OutlinedRegions;
2813 // Iterate over the possible sets of similarity.
2814 unsigned PotentialGroupIdx = 0;
2815 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2816 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2817
2818 // Remove entries that were previously outlined
2819 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2820
2821 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2822 // trying to outlined since there is no compatible similar instance of this
2823 // code.
2824 if (CurrentGroup.Regions.size() < 2)
2825 continue;
2826
2827 // Determine if there are any values that are the same constant throughout
2828 // each section in the set.
2829 NotSame.clear();
2830 CurrentGroup.findSameConstants(NotSame);
2831
2832 if (CurrentGroup.IgnoreGroup)
2833 continue;
2834
2835 // Create a CodeExtractor for each outlinable region. Identify inputs and
2836 // outputs for each section using the code extractor and create the argument
2837 // types for the Aggregate Outlining Function.
2838 OutlinedRegions.clear();
2839 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2840 // Break the outlinable region out of its parent BasicBlock into its own
2841 // BasicBlocks (see function implementation).
2842 OS->splitCandidate();
2843
2844 // There's a chance that when the region is split, extra instructions are
2845 // added to the region. This makes the region no longer viable
2846 // to be split, so we ignore it for outlining.
2847 if (!OS->CandidateSplit)
2848 continue;
2849
2851 DenseSet<BasicBlock *> BlocksInRegion;
2852 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2853 OS->CE = new (ExtractorAllocator.Allocate())
2854 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2855 false, nullptr, "outlined");
2856 findAddInputsOutputs(M, *OS, NotSame);
2857 if (!OS->IgnoreRegion)
2858 OutlinedRegions.push_back(OS);
2859
2860 // We recombine the blocks together now that we have gathered all the
2861 // needed information.
2862 OS->reattachCandidate();
2863 }
2864
2865 CurrentGroup.Regions = std::move(OutlinedRegions);
2866
2867 if (CurrentGroup.Regions.empty())
2868 continue;
2869
2870 CurrentGroup.collectGVNStoreSets(M);
2871
2872 if (CostModel)
2873 findCostBenefit(M, CurrentGroup);
2874
2875 // If we are adhering to the cost model, skip those groups where the cost
2876 // outweighs the benefits.
2877 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2879 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2880 ORE.emit([&]() {
2881 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2882 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2883 C->frontInstruction());
2884 R << "did not outline "
2885 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2886 << " regions due to estimated increase of "
2887 << ore::NV("InstructionIncrease",
2888 CurrentGroup.Cost - CurrentGroup.Benefit)
2889 << " instructions at locations ";
2890 interleave(
2891 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2892 [&R](OutlinableRegion *Region) {
2893 R << ore::NV(
2894 "DebugLoc",
2895 Region->Candidate->frontInstruction()->getDebugLoc());
2896 },
2897 [&R]() { R << " "; });
2898 return R;
2899 });
2900 continue;
2901 }
2902
2903 NegativeCostGroups.push_back(&CurrentGroup);
2904 }
2905
2906 ExtractorAllocator.DestroyAll();
2907
2908 if (NegativeCostGroups.size() > 1)
2909 stable_sort(NegativeCostGroups,
2910 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2911 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2912 });
2913
2914 std::vector<Function *> FuncsToRemove;
2915 for (OutlinableGroup *CG : NegativeCostGroups) {
2916 OutlinableGroup &CurrentGroup = *CG;
2917
2918 OutlinedRegions.clear();
2919 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2920 // We check whether our region is compatible with what has already been
2921 // outlined, and whether we need to ignore this item.
2922 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2923 continue;
2924 OutlinedRegions.push_back(Region);
2925 }
2926
2927 if (OutlinedRegions.size() < 2)
2928 continue;
2929
2930 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2931 // we are still outlining enough regions to make up for the added cost.
2932 CurrentGroup.Regions = std::move(OutlinedRegions);
2933 if (CostModel) {
2934 CurrentGroup.Benefit = 0;
2935 CurrentGroup.Cost = 0;
2936 findCostBenefit(M, CurrentGroup);
2937 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2938 continue;
2939 }
2940 OutlinedRegions.clear();
2941 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2942 Region->splitCandidate();
2943 if (!Region->CandidateSplit)
2944 continue;
2945 OutlinedRegions.push_back(Region);
2946 }
2947
2948 CurrentGroup.Regions = std::move(OutlinedRegions);
2949 if (CurrentGroup.Regions.size() < 2) {
2950 for (OutlinableRegion *R : CurrentGroup.Regions)
2951 R->reattachCandidate();
2952 continue;
2953 }
2954
2955 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2956 << " and benefit " << CurrentGroup.Benefit << "\n");
2957
2958 // Create functions out of all the sections, and mark them as outlined.
2959 OutlinedRegions.clear();
2960 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2962 DenseSet<BasicBlock *> BlocksInRegion;
2963 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2964 OS->CE = new (ExtractorAllocator.Allocate())
2965 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2966 false, nullptr, "outlined");
2967 bool FunctionOutlined = extractSection(*OS);
2968 if (FunctionOutlined) {
2969 unsigned StartIdx = OS->Candidate->getStartIdx();
2970 unsigned EndIdx = OS->Candidate->getEndIdx();
2971 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2972 Outlined.insert(Idx);
2973
2974 OutlinedRegions.push_back(OS);
2975 }
2976 }
2977
2978 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2979 << " with benefit " << CurrentGroup.Benefit
2980 << " and cost " << CurrentGroup.Cost << "\n");
2981
2982 CurrentGroup.Regions = std::move(OutlinedRegions);
2983
2984 if (CurrentGroup.Regions.empty())
2985 continue;
2986
2988 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2989 ORE.emit([&]() {
2990 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2991 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2992 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2993 << " regions with decrease of "
2994 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2995 << " instructions at locations ";
2996 interleave(
2997 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2998 [&R](OutlinableRegion *Region) {
2999 R << ore::NV("DebugLoc",
3000 Region->Candidate->frontInstruction()->getDebugLoc());
3001 },
3002 [&R]() { R << " "; });
3003 return R;
3004 });
3005
3006 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
3007 OutlinedFunctionNum);
3008 }
3009
3010 for (Function *F : FuncsToRemove)
3011 F->eraseFromParent();
3012
3013 return OutlinedFunctionNum;
3014}
3015
3017 CostModel = !NoCostModel;
3018 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
3019
3020 return doOutline(M) > 0;
3021}
3022
3024 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3025
3026 std::function<TargetTransformInfo &(Function &)> GTTI =
3027 [&FAM](Function &F) -> TargetTransformInfo & {
3029 };
3030
3031 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3032 [&AM](Module &M) -> IRSimilarityIdentifier & {
3033 return AM.getResult<IRSimilarityAnalysis>(M);
3034 };
3035
3036 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3037 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3038 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3039 ORE.reset(new OptimizationRemarkEmitter(&F));
3040 return *ORE;
3041 };
3042
3043 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3044 return PreservedAnalyses::none();
3045 return PreservedAnalyses::all();
3046}
ReachingDefAnalysis InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:835
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:156
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:708
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:865
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs, std::vector< Function * > &FuncsToRemove, const DenseMap< Value *, Value * > &OutputMappings)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:549
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:165
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
Definition: IROutliner.cpp:220
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:936
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:810
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:620
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
Definition: IROutliner.cpp:530
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:463
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
Definition: IROutliner.cpp:780
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
AttributeSet getFnAttrs() const
The function attributes are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:662
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:497
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:279
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
size_t size() const
Definition: BasicBlock.h:469
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:485
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:631
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:45
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:84
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1800
This is an important base class in LLVM.
Definition: Constant.h:42
Debug location.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Class to represent function types.
Definition: DerivedTypes.h:103
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:653
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:172
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
const BasicBlock & back() const
Definition: Function.h:860
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
size_t size() const
Definition: Function.h:856
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:699
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:681
size_t arg_size() const
Definition: Function.h:899
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:519
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
bool isTerminator() const
Definition: Instruction.h:277
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:120
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
iterator begin()
Definition: RegionInfo.h:558
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
iterator end()
Definition: RegionInfo.h:559
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition: SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition: SetVector.h:84
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:215
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
User * user_back()
Definition: Value.h:407
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:542
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1096
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
bool erase(const ValueT &V)
Definition: DenseSet.h:101
An opaque object representing a hash code.
Definition: Hashing.h:75
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:202
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:165
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:127
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:2152
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:42
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:38
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:46
@ Other
Any other memory.
auto predecessors(const MachineBasicBlock *BB)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
Definition: DebugInfo.cpp:422
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:72
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:135
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:78
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:98
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:598
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:131
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
Definition: IROutliner.cpp:123
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
Definition: IROutliner.cpp:119
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:74
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
Definition: IROutliner.cpp:124
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:86
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:89
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:114
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:82
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:102
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:93
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:80
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:110
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:605
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:128
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:106
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:486
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:248
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:186
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:198
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:375