LLVM 20.0.0git
IROutliner.h
Go to the documentation of this file.
1//===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// The interface file for the IROutliner which is used by the IROutliner Pass.
11//
12// The outliner uses the IRSimilarityIdentifier to identify the similar regions
13// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
14// whether it will provide code size reduction. Each region is extracted using
15// the code extractor. These extracted functions are consolidated into a single
16// function and called from the extracted call site.
17//
18// For example:
19// \code
20// %1 = add i32 %a, %b
21// %2 = add i32 %b, %a
22// %3 = add i32 %b, %a
23// %4 = add i32 %a, %b
24// \endcode
25// would become function
26// \code
27// define internal void outlined_ir_function(i32 %0, i32 %1) {
28// %1 = add i32 %0, %1
29// %2 = add i32 %1, %0
30// ret void
31// }
32// \endcode
33// with calls:
34// \code
35// call void outlined_ir_function(i32 %a, i32 %b)
36// call void outlined_ir_function(i32 %b, i32 %a)
37// \endcode
38//
39//===----------------------------------------------------------------------===//
40
41#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43
45#include "llvm/IR/PassManager.h"
48
49struct OutlinableGroup;
50
51namespace llvm {
52using namespace CallingConv;
53using namespace IRSimilarity;
54
55class Module;
56class TargetTransformInfo;
57class OptimizationRemarkEmitter;
58
59/// The OutlinableRegion holds all the information for a specific region, or
60/// sequence of instructions. This includes what values need to be hoisted to
61/// arguments from the extracted function, inputs and outputs to the region, and
62/// mapping from the extracted function arguments to overall function arguments.
64 /// Describes the region of code.
66
67 /// If this region is outlined, the front and back IRInstructionData could
68 /// potentially become invalidated if the only new instruction is a call.
69 /// This ensures that we replace in the instruction in the IRInstructionData.
72
73 /// The number of extracted inputs from the CodeExtractor.
74 unsigned NumExtractedInputs = 0;
75
76 /// The corresponding BasicBlock with the appropriate stores for this
77 /// OutlinableRegion in the overall function.
78 unsigned OutputBlockNum = -1;
79
80 /// Mapping the extracted argument number to the argument number in the
81 /// overall function. Since there will be inputs, such as elevated constants
82 /// that are not the same in each region in a SimilarityGroup, or values that
83 /// cannot be sunk into the extracted section in every region, we must keep
84 /// track of which extracted argument maps to which overall argument.
87
88 /// Values in the outlined functions will often be replaced by arguments. When
89 /// finding corresponding values from one region to another, the found value
90 /// will be the value the argument previously replaced. This structure maps
91 /// any replaced values for the region to the aggregate aggregate argument
92 /// in the overall function.
94
95 /// Marks whether we need to change the order of the arguments when mapping
96 /// the old extracted function call to the new aggregate outlined function
97 /// call.
98 bool ChangedArgOrder = false;
99
100 /// Marks whether this region ends in a branch, there is special handling
101 /// required for the following basic blocks in this case.
102 bool EndsInBranch = false;
103
104 /// The PHIBlocks with their corresponding return block based on the return
105 /// value as the key.
107
108 /// Mapping of the argument number in the deduplicated function
109 /// to a given constant, which is used when creating the arguments to the call
110 /// to the newly created deduplicated function. This is handled separately
111 /// since the CodeExtractor does not recognize constants.
113
114 /// The global value numbers that are used as outputs for this section. Once
115 /// extracted, each output will be stored to an output register. This
116 /// documents the global value numbers that are used in this pattern.
118
119 /// Used to create an outlined function.
120 CodeExtractor *CE = nullptr;
121
122 /// The call site of the extracted region.
123 CallInst *Call = nullptr;
124
125 /// The function for the extracted region.
127
128 /// Flag for whether we have split out the IRSimilarityCanidate. That is,
129 /// make the region contained the IRSimilarityCandidate its own BasicBlock.
130 bool CandidateSplit = false;
131
132 /// Flag for whether we should not consider this region for extraction.
133 bool IgnoreRegion = false;
134
135 /// The BasicBlock that is before the start of the region BasicBlock,
136 /// only defined when the region has been split.
137 BasicBlock *PrevBB = nullptr;
138
139 /// The BasicBlock that contains the starting instruction of the region.
140 BasicBlock *StartBB = nullptr;
141
142 /// The BasicBlock that contains the ending instruction of the region.
143 BasicBlock *EndBB = nullptr;
144
145 /// The BasicBlock that is after the start of the region BasicBlock,
146 /// only defined when the region has been split.
148
149 /// The Outlinable Group that contains this region and structurally similar
150 /// regions to this region.
152
154 : Candidate(&C), Parent(&Group) {
155 StartBB = C.getStartBB();
156 EndBB = C.getEndBB();
157 }
158
159 /// For the contained region, split the parent BasicBlock at the starting and
160 /// ending instructions of the contained IRSimilarityCandidate.
161 void splitCandidate();
162
163 /// For the contained region, reattach the BasicBlock at the starting and
164 /// ending instructions of the contained IRSimilarityCandidate, or if the
165 /// function has been extracted, the start and end of the BasicBlock
166 /// containing the called function.
167 void reattachCandidate();
168
169 /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
170 ///
171 /// \param Other [in] - The OutlinableRegion to find the corresponding Value
172 /// in.
173 /// \param V [in] - The Value to look for in the other region.
174 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
176
177 /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
178 ///
179 /// \param Other [in] - The OutlinableRegion to find the corresponding
180 /// BasicBlock in.
181 /// \param BB [in] - The BasicBlock to look for in the other region.
182 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
184 BasicBlock *BB);
185
186 /// Get the size of the code removed from the region.
187 ///
188 /// \param [in] TTI - The TargetTransformInfo for the parent function.
189 /// \returns the code size of the region
191};
192
193/// This class is a pass that identifies similarity in a Module, extracts
194/// instances of the similarity, and then consolidating the similar regions
195/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
196/// to identify the similar regions of code, and then extracts the similar
197/// sections into a single function. See the above for an example as to
198/// how code is extracted and consolidated into a single function.
200public:
204 : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
205
206 // Check that the DenseMap implementation has not changed.
208 "DenseMapInfo<unsigned>'s empty key isn't -1!");
210 "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
211 }
212 bool run(Module &M);
213
214private:
215 /// Find repeated similar code sequences in \p M and outline them into new
216 /// Functions.
217 ///
218 /// \param [in] M - The module to outline from.
219 /// \returns The number of Functions created.
220 unsigned doOutline(Module &M);
221
222 /// Check whether an OutlinableRegion is incompatible with code already
223 /// outlined. OutlinableRegions are incomptaible when there are overlapping
224 /// instructions, or code that has not been recorded has been added to the
225 /// instructions.
226 ///
227 /// \param [in] Region - The OutlinableRegion to check for conflicts with
228 /// already outlined code.
229 /// \returns whether the region can safely be outlined.
230 bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
231
232 /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
233 /// instructions contained in a previously outlined region and put the
234 /// remaining regions in \p CurrentGroup.
235 ///
236 /// \param [in] CandidateVec - List of similarity candidates for regions with
237 /// the same similarity structure.
238 /// \param [in,out] CurrentGroup - Contains the potential sections to
239 /// be outlined.
240 void
241 pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
242 OutlinableGroup &CurrentGroup);
243
244 /// Create the function based on the overall types found in the current
245 /// regions being outlined.
246 ///
247 /// \param M - The module to outline from.
248 /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
249 /// \param [in] FunctionNameSuffix - How many functions have we previously
250 /// created.
251 /// \returns the newly created function.
252 Function *createFunction(Module &M, OutlinableGroup &CG,
253 unsigned FunctionNameSuffix);
254
255 /// Identify the needed extracted inputs in a section, and add to the overall
256 /// function if needed.
257 ///
258 /// \param [in] M - The module to outline from.
259 /// \param [in,out] Region - The region to be extracted.
260 /// \param [in] NotSame - The global value numbers of the Values in the region
261 /// that do not have the same Constant in each strucutrally similar region.
262 void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
263 DenseSet<unsigned> &NotSame);
264
265 /// Find the number of instructions that will be removed by extracting the
266 /// OutlinableRegions in \p CurrentGroup.
267 ///
268 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
269 /// analyzed.
270 /// \returns the number of outlined instructions across all regions.
271 InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
272
273 /// Find the number of instructions that will be added by reloading arguments.
274 ///
275 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
276 /// analyzed.
277 /// \returns the number of added reload instructions across all regions.
278 InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
279
280 /// Find the cost and the benefit of \p CurrentGroup and save it back to
281 /// \p CurrentGroup.
282 ///
283 /// \param [in] M - The module being analyzed
284 /// \param [in,out] CurrentGroup - The overall outlined section
285 void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
286
287 /// Update the output mapping based on the load instruction, and the outputs
288 /// of the extracted function.
289 ///
290 /// \param Region - The region extracted
291 /// \param Outputs - The outputs from the extracted function.
292 /// \param LI - The load instruction used to update the mapping.
293 void updateOutputMapping(OutlinableRegion &Region,
294 ArrayRef<Value *> Outputs, LoadInst *LI);
295
296 /// Extract \p Region into its own function.
297 ///
298 /// \param [in] Region - The region to be extracted into its own function.
299 /// \returns True if it was successfully outlined.
300 bool extractSection(OutlinableRegion &Region);
301
302 /// For the similarities found, and the extracted sections, create a single
303 /// outlined function with appropriate output blocks as necessary.
304 ///
305 /// \param [in] M - The module to outline from
306 /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
307 /// \param [in,out] FuncsToRemove - List of functions to remove from the
308 /// module after outlining is completed.
309 /// \param [in,out] OutlinedFunctionNum - the number of new outlined
310 /// functions.
311 void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
312 std::vector<Function *> &FuncsToRemove,
313 unsigned &OutlinedFunctionNum);
314
315 /// If true, enables us to outline from functions that have LinkOnceFromODR
316 /// linkages.
317 bool OutlineFromLinkODRs = false;
318
319 /// If false, we do not worry if the cost is greater than the benefit. This
320 /// is for debugging and testing, so that we can test small cases to ensure
321 /// that the outlining is being done correctly.
322 bool CostModel = true;
323
324 /// The set of outlined Instructions, identified by their location in the
325 /// sequential ordering of instructions in a Module.
326 DenseSet<unsigned> Outlined;
327
328 /// TargetTransformInfo lambda for target specific information.
330
331 /// A mapping from newly created reloaded output values to the original value.
332 /// If an value is replace by an output from an outlined region, this maps
333 /// that Value, back to its original Value.
334 DenseMap<Value *, Value *> OutputMappings;
335
336 /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
338
339 /// The optimization remark emitter for the pass.
341
342 /// The memory allocator used to allocate the CodeExtractors.
344
345 /// The memory allocator used to allocate the OutlinableRegions.
347
348 /// The memory allocator used to allocate new IRInstructionData.
350
351 /// Custom InstVisitor to classify different instructions for whether it can
352 /// be analyzed for similarity. This is needed as there may be instruction we
353 /// can identify as having similarity, but are more complicated to outline.
354 struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
355 InstructionAllowed() = default;
356
357 bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
358 bool visitPHINode(PHINode &PN) { return EnableBranches; }
359 // TODO: Handle allocas.
360 bool visitAllocaInst(AllocaInst &AI) { return false; }
361 // VAArg instructions are not allowed since this could cause difficulty when
362 // differentiating between different sets of variable instructions in
363 // the deduplicated outlined regions.
364 bool visitVAArgInst(VAArgInst &VI) { return false; }
365 // We exclude all exception handling cases since they are so context
366 // dependent.
367 bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
368 bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
369 // DebugInfo should be included in the regions, but should not be
370 // analyzed for similarity as it has no bearing on the outcome of the
371 // program.
372 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
373 // TODO: Handle specific intrinsics individually from those that can be
374 // handled.
375 bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
376 // We only handle CallInsts that are not indirect, since we cannot guarantee
377 // that they have a name in these cases.
378 bool visitCallInst(CallInst &CI) {
379 Function *F = CI.getCalledFunction();
380 bool IsIndirectCall = CI.isIndirectCall();
381 if (IsIndirectCall && !EnableIndirectCalls)
382 return false;
383 if (!F && !IsIndirectCall)
384 return false;
385 // Returning twice can cause issues with the state of the function call
386 // that were not expected when the function was used, so we do not include
387 // the call in outlined functions.
388 if (CI.canReturnTwice())
389 return false;
390 // TODO: Update the outliner to capture whether the outlined function
391 // needs these extra attributes.
392
393 // Functions marked with the swifttailcc and tailcc calling conventions
394 // require special handling when outlining musttail functions. The
395 // calling convention must be passed down to the outlined function as
396 // well. Further, there is special handling for musttail calls as well,
397 // requiring a return call directly after. For now, the outliner does not
398 // support this.
399 bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
400 CI.getCallingConv() == CallingConv::Tail;
401 if (IsTailCC && !EnableMustTailCalls)
402 return false;
403 if (CI.isMustTailCall() && !EnableMustTailCalls)
404 return false;
405 // The outliner can only handle musttail items if it is also accompanied
406 // by the tailcc or swifttailcc calling convention.
407 if (CI.isMustTailCall() && !IsTailCC)
408 return false;
409 return true;
410 }
411 // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
412 // the outlined region, and then returned as an output, this will have to be
413 // handled differently.
414 bool visitFreezeInst(FreezeInst &CI) { return false; }
415 // TODO: We do not current handle similarity that changes the control flow.
416 bool visitInvokeInst(InvokeInst &II) { return false; }
417 // TODO: We do not current handle similarity that changes the control flow.
418 bool visitCallBrInst(CallBrInst &CBI) { return false; }
419 // TODO: Handle interblock similarity.
420 bool visitTerminator(Instruction &I) { return false; }
421 bool visitInstruction(Instruction &I) { return true; }
422
423 // The flag variable that marks whether we should allow branch instructions
424 // to be outlined.
425 bool EnableBranches = false;
426
427 // The flag variable that marks whether we should allow indirect calls
428 // to be outlined.
429 bool EnableIndirectCalls = true;
430
431 // The flag variable that marks whether we should allow intrinsics
432 // instructions to be outlined.
433 bool EnableIntrinsics = false;
434
435 // The flag variable that marks whether we should allow musttail calls.
436 bool EnableMustTailCalls = false;
437 };
438
439 /// A InstVisitor used to exclude certain instructions from being outlined.
440 InstructionAllowed InstructionClassifier;
441};
442
443/// Pass to outline similar regions.
444class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
445public:
447};
448
449} // end namespace llvm
450
451#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Check Debug Module
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Conditional or Unconditional Branch instruction.
This class represents a function call, abstracting a target machine's calling convention.
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Pass to outline similar regions.
Definition: IROutliner.h:444
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
IROutliner(function_ref< TargetTransformInfo &(Function &)> GTTI, function_ref< IRSimilarityIdentifier &(Module &)> GIRSI, function_ref< OptimizationRemarkEmitter &(Function &)> GORE)
Definition: IROutliner.h:201
bool run(Module &M)
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Base class for instruction visitors.
Definition: InstVisitor.h:78
An instruction for reading from memory.
Definition: Instructions.h:176
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:389
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
Definition: Value.h:74
An efficient, type-erasing, non-owning reference to a callable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Other
Any other memory.
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:72
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
This provides the utilities for hashing an Instruction to an unsigned integer.
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
CodeExtractor * CE
Used to create an outlined function.
Definition: IROutliner.h:120
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:486
DenseMap< unsigned, Constant * > AggArgToConstant
Mapping of the argument number in the deduplicated function to a given constant, which is used when c...
Definition: IROutliner.h:112
DenseMap< unsigned, unsigned > AggArgToExtracted
Definition: IROutliner.h:86
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
IRInstructionData * NewFront
If this region is outlined, the front and back IRInstructionData could potentially become invalidated...
Definition: IROutliner.h:70
SmallVector< unsigned, 4 > GVNStores
The global value numbers that are used as outputs for this section.
Definition: IROutliner.h:117
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition: IROutliner.h:133
bool ChangedArgOrder
Marks whether we need to change the order of the arguments when mapping the old extracted function ca...
Definition: IROutliner.h:98
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:248
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:186
DenseMap< unsigned, unsigned > ExtractedArgToAgg
Mapping the extracted argument number to the argument number in the overall function.
Definition: IROutliner.h:85
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:198
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
unsigned NumExtractedInputs
The number of extracted inputs from the CodeExtractor.
Definition: IROutliner.h:74
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.h:106
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
OutlinableGroup * Parent
The Outlinable Group that contains this region and structurally similar regions to this region.
Definition: IROutliner.h:151
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition: IROutliner.h:153
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
IRInstructionData * NewBack
Definition: IROutliner.h:71
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:375
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69