LLVM 23.0.0git
CodeExtractor.h
Go to the documentation of this file.
1//===- Transform/Utils/CodeExtractor.h - Code extraction util ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// A utility to support extracting code from one function into its own
10// stand-alone function.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
15#define LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SetVector.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
23#include <limits>
24
25namespace llvm {
26
27template <typename PtrType> class SmallPtrSetImpl;
29class AllocaInst;
30class BlockFrequency;
33class AssumptionCache;
34class CallInst;
35class DominatorTree;
36class Function;
37class Instruction;
38class Module;
39class Type;
40class Value;
41class StructType;
42
43/// A cache for the CodeExtractor analysis. The operation \ref
44/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this
45/// object. This object should conservatively be considered invalid if any
46/// other mutating operations on the IR occur.
47///
48/// Constructing this object is O(n) in the size of the function.
50 /// The allocas in the function.
52
53 /// Base memory addresses of load/store instructions, grouped by block.
55
56 /// Blocks which contain instructions which may have unknown side-effects
57 /// on memory.
58 DenseSet<BasicBlock *> SideEffectingBlocks;
59
60 void findSideEffectInfoForBlock(BasicBlock &BB);
61
62public:
64
65 /// Get the allocas in the function at the time the analysis was created.
66 /// Note that some of these allocas may no longer be present in the function,
67 /// due to \ref CodeExtractor::extractCodeRegion.
68 ArrayRef<AllocaInst *> getAllocas() const { return Allocas; }
69
70 /// Check whether \p BB contains an instruction thought to load from, store
71 /// to, or otherwise clobber the alloca \p Addr.
73 AllocaInst *Addr) const;
74};
75
76/// Utility class for extracting code into a new function.
77///
78/// This utility provides a simple interface for extracting some sequence of
79/// code into its own function, replacing it with a call to that function. It
80/// also provides various methods to query about the nature and result of such a
81/// transformation.
82///
83/// The rough algorithm used is:
84/// 1) Find both the inputs and outputs for the extracted region.
85/// 2) Pass the inputs as arguments, remapping them within the extracted
86/// function to arguments.
87/// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas as
88/// arguments, and inserting stores to the arguments for any scalars.
90 using ValueSet = SetVector<Value *>;
91
92 // Various bits of state computed on construction.
93 DominatorTree *const DT;
94 const bool AggregateArgs;
98
99 /// A block outside of the extraction set where any intermediate allocations
100 /// will be placed inside. If this is null, allocations will be placed in the
101 /// entry block of the function.
102 BasicBlock *AllocationBlock;
103
104 /// A set of blocks outside of the extraction set where deallocations for
105 /// intermediate allocations should be placed. Not used for automatically
106 /// deallocated memory (e.g. `alloca`), which is the default.
107 ///
108 /// If it is empty and needed, the end of the replacement basic block will be
109 /// used to place deallocations.
110 SmallVector<BasicBlock *> DeallocationBlocks;
111
112 /// If true, varargs functions can be extracted.
113 bool AllowVarArgs;
114
115 /// Bits of intermediate state computed at various phases of extraction.
117
118 /// Lists of blocks that are branched from the code region to be extracted,
119 /// also called the exit blocks. Each block is contained at most once. Its
120 /// order defines the return value of the extracted function.
121 ///
122 /// When there is just one (or no) exit block, the return value is irrelevant.
123 ///
124 /// When there are exactly two exit blocks, the extracted function returns a
125 /// boolean. For ExtractedFuncRetVals[0], it returns 'true'. For
126 /// ExtractedFuncRetVals[1] it returns 'false'.
127 /// NOTE: Since a boolean is represented by i1, ExtractedFuncRetVals[0]
128 /// returns 1 and ExtractedFuncRetVals[1] returns 0, which opposite of
129 /// the regular pattern below.
130 ///
131 /// When there are 3 or more exit blocks, leaving the extracted function via
132 /// the first block it returns 0. When leaving via the second entry it returns
133 /// 1, etc.
134 SmallVector<BasicBlock *> ExtractedFuncRetVals;
135
136 /// Suffix to use when creating extracted function (appended to the original
137 /// function name + "."). If empty, the default is to use the entry block
138 /// label, if non-empty, otherwise "extracted".
139 std::string Suffix;
140
141 /// If true, the outlined function has aggregate argument in zero address
142 /// space.
143 bool ArgsInZeroAddressSpace;
144
145 // If true, the outlined function always return void even when there is only
146 // one output.
147 bool VoidReturnWithSingleOutput;
148
149 // If set, the return value of the outline function.
150 Value *FuncRetVal = nullptr;
151
152public:
153 /// Create a code extractor for a sequence of blocks.
154 ///
155 /// Given a sequence of basic blocks where the first block in the sequence
156 /// dominates the rest, prepare a code extractor object for pulling this
157 /// sequence out into its new function. When a DominatorTree is also given,
158 /// extra checking and transformations are enabled. If AllowVarArgs is true,
159 /// vararg functions can be extracted. This is safe, if all vararg handling
160 /// code is extracted, including vastart. If AllowAlloca is true, then
161 /// extraction of blocks containing alloca instructions would be possible,
162 /// however code extractor won't validate whether extraction is legal. Any new
163 /// allocations will be placed in the AllocationBlock, unless it is null, in
164 /// which case it will be placed in the entry block of the function from which
165 /// the code is being extracted. Explicit deallocations for the aforementioned
166 /// allocations will be placed, if needed, in all blocks in DeallocationBlocks
167 /// or the end of the replacement block. If ArgsInZeroAddressSpace param is
168 /// set to true, then the aggregate param pointer of the outlined function is
169 /// declared in zero address space. If VoidReturnWithSingleOutput is set to
170 /// true, then the return type of the outlined function is set void even if
171 /// there is only one output.
173 bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
174 BranchProbabilityInfo *BPI = nullptr,
175 AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
176 bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr,
177 ArrayRef<BasicBlock *> DeallocationBlocks = {},
178 std::string Suffix = "", bool ArgsInZeroAddressSpace = false,
179 bool VoidReturnWithSingleOutput = true);
180
181 virtual ~CodeExtractor() = default;
182
183 /// Perform the extraction, returning the new function.
184 ///
185 /// Returns zero when called on a CodeExtractor instance where isEligible
186 /// returns false.
188
189 /// Perform the extraction, returning the new function and providing an
190 /// interface to see what was categorized as inputs and outputs.
191 ///
192 /// \param CEAC - Cache to speed up operations for the CodeExtractor when
193 /// hoisting, and extracting lifetime values and assumes.
194 /// \param Inputs [in/out] - filled with values marked as inputs to the newly
195 /// outlined function.
196 /// \param Outputs [out] - filled with values marked as outputs to the newly
197 /// outlined function.
198 /// \returns zero when called on a CodeExtractor instance where isEligible
199 /// returns false.
201 ValueSet &Inputs, ValueSet &Outputs);
202
203 /// Verify that assumption cache isn't stale after a region is extracted.
204 /// Returns true when verifier finds errors. AssumptionCache is passed as
205 /// parameter to make this function stateless.
206 static bool verifyAssumptionCache(const Function &OldFunc,
207 const Function &NewFunc,
208 AssumptionCache *AC);
209
210 /// Test whether this code extractor is eligible.
211 ///
212 /// Based on the blocks used when constructing the code extractor, determine
213 /// whether it is eligible for extraction.
214 ///
215 /// Checks that varargs handling (with vastart and vaend) is only done in the
216 /// outlined blocks.
217 bool isEligible() const;
218
219 /// Compute the set of input values and output values for the code.
220 ///
221 /// These can be used either when performing the extraction or to evaluate the
222 /// expected size of a call to the extracted function. Note that this work
223 /// cannot be cached between the two as once we decide to extract a code
224 /// sequence, that sequence is modified, including changing these sets, before
225 /// extraction occurs. These modifications won't have any significant impact
226 /// on the cost however.
227 void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
228 const ValueSet &Allocas,
229 bool CollectGlobalInputs = false);
230
231 /// Check if life time marker nodes can be hoisted/sunk into the outline
232 /// region.
233 ///
234 /// Returns true if it is safe to do the code motion.
235 bool
237 Instruction *AllocaAddr) const;
238
239 /// Find the set of allocas whose life ranges are contained within the
240 /// outlined region.
241 ///
242 /// Allocas which have life_time markers contained in the outlined region
243 /// should be pushed to the outlined function. The address bitcasts that are
244 /// used by the lifetime markers are also candidates for shrink-wrapping. The
245 /// instructions that need to be sunk are collected in 'Allocas'.
246 void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands,
247 ValueSet &HoistCands, BasicBlock *&ExitBlock) const;
248
249 /// Find or create a block within the outline region for placing hoisted code.
250 ///
251 /// CommonExitBlock is block outside the outline region. It is the common
252 /// successor of blocks inside the region. If there exists a single block
253 /// inside the region that is the predecessor of CommonExitBlock, that block
254 /// will be returned. Otherwise CommonExitBlock will be split and the original
255 /// block will be added to the outline region.
257
258 /// Exclude a value from aggregate argument passing when extracting a code
259 /// region, passing it instead as a scalar.
261
262protected:
263 /// Allocate an intermediate variable at the specified point.
265 Type *VarType, const Twine &Name = Twine(""),
266 AddrSpaceCastInst **CastedAlloc = nullptr);
267
268 /// Deallocate a previously-allocated intermediate variable at the specified
269 /// point.
271 Value *Var, Type *VarType);
272
273private:
274 struct LifetimeMarkerInfo {
275 bool SinkLifeStart = false;
276 bool HoistLifeEnd = false;
277 Instruction *LifeStart = nullptr;
278 Instruction *LifeEnd = nullptr;
279 };
280
281 ValueSet ExcludeArgsFromAggregate;
282
283 LifetimeMarkerInfo getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
284 Instruction *Addr,
285 BasicBlock *ExitBlock) const;
286
287 /// Updates the list of SwitchCases (corresponding to exit blocks) after
288 /// changes of the control flow or the Blocks list.
289 void computeExtractedFuncRetVals();
290
291 /// Return the type used for the return code of the extracted function to
292 /// indicate which exit block to jump to.
293 Type *getSwitchType();
294
295 void severSplitPHINodesOfEntry(BasicBlock *&Header);
296 void severSplitPHINodesOfExits();
297 void splitReturnBlocks();
298
299 void moveCodeToFunction(Function *newFunction);
300
301 void calculateNewCallTerminatorWeights(
302 BasicBlock *CodeReplacer,
305
306 /// Normalizes the control flow of the extracted regions, such as ensuring
307 /// that the extracted region does not contain a return instruction.
308 void normalizeCFGForExtraction(BasicBlock *&header);
309
310 /// Generates the function declaration for the function containing the
311 /// extracted code.
312 Function *
313 constructFunctionDeclaration(const ValueSet &inputs, const ValueSet &outputs,
314 BlockFrequency EntryFreq, const Twine &Name,
315 ValueSet &StructValues, StructType *&StructTy);
316
317 /// Generates the code for the extracted function. That is: a prolog, the
318 /// moved or copied code from the original function, and epilogs for each
319 /// exit.
320 void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs,
321 const ValueSet &StructValues, Function *newFunction,
322 StructType *StructArgTy, BasicBlock *header,
323 const ValueSet &SinkingCands,
324 SmallVectorImpl<Value *> &NewValues);
325
326 /// Generates a Basic Block that calls the extracted function.
327 CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs,
328 const ValueSet &StructValues,
329 Function *newFunction, StructType *StructArgTy,
330 Function *oldFunction, BasicBlock *ReplIP,
331 BlockFrequency EntryFreq,
332 ArrayRef<Value *> LifetimesStart,
333 std::vector<Value *> &Reloads);
334
335 /// Connects the basic block containing the call to the extracted function
336 /// into the original function's control flow.
337 void
338 insertReplacerCall(Function *oldFunction, BasicBlock *header,
339 CallInst *ReplacerCall, const ValueSet &outputs,
340 ArrayRef<Value *> Reloads,
341 const DenseMap<BasicBlock *, BlockFrequency> &ExitWeights);
342};
343
344} // end namespace llvm
345
346#endif // LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
#define LLVM_ABI
Definition Compiler.h:213
This file defines the DenseMap class.
#define F(x, y, z)
Definition MD5.cpp:54
This file implements a set that has insertion order iteration characteristics.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis providing branch probability information.
This class represents a function call, abstracting a target machine's calling convention.
A cache for the CodeExtractor analysis.
ArrayRef< AllocaInst * > getAllocas() const
Get the allocas in the function at the time the analysis was created.
LLVM_ABI CodeExtractorAnalysisCache(Function &F)
LLVM_ABI bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const
Check whether BB contains an instruction thought to load from, store to, or otherwise clobber the all...
BasicBlock * findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock)
Find or create a block within the outline region for placing hoisted code.
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas, bool CollectGlobalInputs=false)
Compute the set of input values and output values for the code.
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
virtual Instruction * allocateVar(IRBuilder<>::InsertPoint AllocaIP, Type *VarType, const Twine &Name=Twine(""), AddrSpaceCastInst **CastedAlloc=nullptr)
Allocate an intermediate variable at the specified point.
CodeExtractor(ArrayRef< BasicBlock * > BBs, DominatorTree *DT=nullptr, bool AggregateArgs=false, BlockFrequencyInfo *BFI=nullptr, BranchProbabilityInfo *BPI=nullptr, AssumptionCache *AC=nullptr, bool AllowVarArgs=false, bool AllowAlloca=false, BasicBlock *AllocationBlock=nullptr, ArrayRef< BasicBlock * > DeallocationBlocks={}, std::string Suffix="", bool ArgsInZeroAddressSpace=false, bool VoidReturnWithSingleOutput=true)
Create a code extractor for a sequence of blocks.
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
static bool verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC)
Verify that assumption cache isn't stale after a region is extracted.
virtual ~CodeExtractor()=default
virtual Instruction * deallocateVar(IRBuilder<>::InsertPoint DeallocIP, Value *Var, Type *VarType)
Deallocate a previously-allocated intermediate variable at the specified point.
bool isEligible() const
Test whether this code extractor is eligible.
void excludeArgFromAggregate(Value *Arg)
Exclude a value from aggregate argument passing when extracting a code region, passing it instead as ...
bool isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *AllocaAddr) const
Check if life time marker nodes can be hoisted/sunk into the outline region.
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
InsertPoint - A saved insertion point.
Definition IRBuilder.h:298
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A vector that has set insertion semantics.
Definition SetVector.h:57
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
This is an optimization pass for GlobalISel generic memory operations.