Go to the documentation of this file.
57 #define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
62 "amdgpu-any-address-space-out-arguments",
63 cl::desc(
"Replace pointer out arguments with "
64 "struct returns for non-private address space"),
69 "amdgpu-max-return-arg-num-regs",
70 cl::desc(
"Approximately limit number of return registers for replacing out arguments"),
75 "Number out arguments moved to struct return values");
76 STATISTIC(NumOutArgumentFunctionsReplaced,
77 "Number of functions with out arguments moved to struct return values");
99 bool doInitialization(
Module &M)
override;
106 "AMDGPU Rewrite Out Arguments",
false,
false)
111 char AMDGPURewriteOutArguments::
ID = 0;
113 Type *AMDGPURewriteOutArguments::getStoredType(
Value &
Arg)
const {
114 const int MaxUses = 10;
119 Worklist.push_back(&U);
121 Type *StoredType =
nullptr;
122 while (!Worklist.empty()) {
125 if (
auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
127 Worklist.push_back(&U);
131 if (
auto *
SI = dyn_cast<StoreInst>(U->getUser())) {
132 if (UseCount++ > MaxUses)
135 if (!
SI->isSimple() ||
139 if (StoredType && StoredType !=
SI->getValueOperand()->getType())
141 StoredType =
SI->getValueOperand()->getType();
152 Type *AMDGPURewriteOutArguments::getOutArgumentType(
Argument &
Arg)
const {
159 Arg.hasByValAttr() ||
Arg.hasStructRetAttr()) {
163 Type *StoredType = getStoredType(
Arg);
164 if (!StoredType ||
DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
170 bool AMDGPURewriteOutArguments::doInitialization(
Module &M) {
171 DL = &
M.getDataLayout();
180 if (
F.isVarArg() ||
F.hasStructRetAttr() ||
184 MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
186 unsigned ReturnNumRegs = 0;
189 Type *RetTy =
F.getReturnType();
191 ReturnNumRegs =
DL->getTypeStoreSize(RetTy) / 4;
196 ReturnTypes.push_back(RetTy);
201 if (
Type *Ty = getOutArgumentType(
Arg)) {
203 <<
" in function " <<
F.getName() <<
'\n');
204 OutArgs.push_back({&
Arg, Ty});
218 Returns.push_back(RI);
236 for (
const auto &Pair : OutArgs) {
237 bool ThisReplaceable =
true;
241 Type *ArgTy = Pair.second;
248 unsigned ArgNumRegs =
DL->getTypeStoreSize(ArgTy) / 4;
267 ThisReplaceable =
false;
272 if (!ThisReplaceable)
275 for (std::pair<ReturnInst *, StoreInst *>
Store : ReplaceableStores) {
276 Value *ReplVal =
Store.second->getValueOperand();
278 auto &ValVec = Replacements[
Store.first];
280 [OutArg](
const std::pair<Argument *, Value *> &Entry) {
281 return Entry.first == OutArg;
284 <<
"Saw multiple out arg stores" << *OutArg <<
'\n');
287 ThisReplaceable =
false;
291 ValVec.emplace_back(OutArg, ReplVal);
292 Store.second->eraseFromParent();
295 if (ThisReplaceable) {
296 ReturnTypes.push_back(ArgTy);
298 ++NumOutArgumentsReplaced;
304 if (Replacements.
empty())
311 F.getFunctionType()->params(),
314 LLVM_DEBUG(
dbgs() <<
"Computed new return type: " << *NewRetTy <<
'\n');
317 F.getName() +
".body");
318 F.getParent()->getFunctionList().insert(
F.getIterator(), NewFunc);
337 for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
347 NewRetVal =
B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
349 for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
350 NewRetVal =
B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
355 B.CreateRet(NewRetVal);
362 if (OutArgIndexes.
count(
Arg.getArgNo())) {
367 StubCallArgs.push_back(&
Arg);
373 CallInst *StubCall =
B.CreateCall(NewFunc, StubCallArgs);
375 int RetIdx = RetTy->
isVoidTy() ? 0 : 1;
377 if (!OutArgIndexes.
count(
Arg.getArgNo()))
382 Type *EltTy = OutArgIndexes[
Arg.getArgNo()];
384 DL->getValueOrABITypeAlignment(
Arg.getParamAlign(), EltTy);
386 Value *Val =
B.CreateExtractValue(StubCall, RetIdx++);
390 Value *PtrVal =
B.CreateBitCast(&
Arg, PtrTy);
392 B.CreateAlignedStore(Val, PtrVal,
Align);
396 B.CreateRet(
B.CreateExtractValue(StubCall, 0));
402 F.addFnAttr(Attribute::AlwaysInline);
404 ++NumOutArgumentFunctionsReplaced;
409 return new AMDGPURewriteOutArguments();
This class represents an incoming formal argument to a Function.
void setComdat(Comdat *C)
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Return a value (possibly void), from a function.
A parsed version of the target data layout string in and methods for querying it.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A memory dependence query can return one of three different answers.
LLVM_NODISCARD T pop_back_val()
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void stealArgumentListFrom(Function &Src)
Steal arguments from another function.
Represent the analysis usage information of a pass.
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
iterator_range< use_iterator > uses()
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< unsigned > MaxNumRetRegs("amdgpu-max-return-arg-num-regs", cl::desc("Approximately limit number of return registers for replacing out arguments"), cl::Hidden, cl::init(16))
STATISTIC(NumFunctions, "Total number of functions")
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isEntryFunctionCC(CallingConv::ID CC)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An instruction for storing to memory.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
This is an important class for using LLVM in a threaded context.
initializer< Ty > init(const Ty &Val)
Class to represent pointers.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
StandardInstrumentations SI(Debug, VerifyEach)
bool isVoidTy() const
Return true if this is 'void'.
A Module instance is used to store all the information related to an LLVM module.
INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE, "AMDGPU Rewrite Out Arguments", false, false) INITIALIZE_PASS_END(AMDGPURewriteOutArguments
void setOperand(unsigned i, Value *Val)
FunctionPass * createAMDGPURewriteOutArgumentsPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Class to represent struct types.
Type * getType() const
All values are typed, get the type of this value.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM_NODISCARD bool empty() const
static bool runOnFunction(Function &F, bool PostInlining)
Provides a lazy, caching interface for making common memory aliasing information queries,...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
@ PrivateLinkage
Like Internal, but omit from symbol table.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
static cl::opt< bool > AnyAddressSpace("amdgpu-any-address-space-out-arguments", cl::desc("Replace pointer out arguments with " "struct returns for non-private address space"), cl::Hidden, cl::init(false))
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
static unsigned getPointerOperandIndex()
AMDGPU Rewrite Out Arguments
LLVM Value Representation.
Class to represent function types.
A Use represents the edge between a Value definition and its users.
reference emplace_back(ArgTypes &&... Args)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.