57#define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
62 "amdgpu-any-address-space-out-arguments",
63 cl::desc(
"Replace pointer out arguments with "
64 "struct returns for non-private address space"),
69 "amdgpu-max-return-arg-num-regs",
70 cl::desc(
"Approximately limit number of return registers for replacing out arguments"),
75 "Number out arguments moved to struct return values");
77 "Number of functions with out arguments moved to struct return values");
106 "AMDGPU Rewrite Out Arguments",
false,
false)
111char AMDGPURewriteOutArguments::
ID = 0;
113Type *AMDGPURewriteOutArguments::getStoredType(
Value &Arg)
const {
114 const int MaxUses = 10;
118 for (
Use &U : Arg.uses())
121 Type *StoredType =
nullptr;
122 while (!Worklist.
empty()) {
125 if (
auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
126 for (
Use &U : BCI->uses())
131 if (
auto *SI = dyn_cast<StoreInst>(U->getUser())) {
132 if (UseCount++ > MaxUses)
135 if (!SI->isSimple() ||
139 if (StoredType && StoredType != SI->getValueOperand()->getType())
141 StoredType = SI->getValueOperand()->getType();
152Type *AMDGPURewriteOutArguments::getOutArgumentType(
Argument &Arg)
const {
157 if (!ArgTy || (ArgTy->getAddressSpace() !=
DL->getAllocaAddrSpace() &&
163 Type *StoredType = getStoredType(Arg);
164 if (!StoredType ||
DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
170bool AMDGPURewriteOutArguments::doInitialization(
Module &M) {
171 DL = &
M.getDataLayout();
175bool AMDGPURewriteOutArguments::runOnFunction(
Function &
F) {
180 if (
F.isVarArg() ||
F.hasStructRetAttr() ||
184 MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
186 unsigned ReturnNumRegs = 0;
190 if (!
RetTy->isVoidTy()) {
191 ReturnNumRegs =
DL->getTypeStoreSize(
RetTy) / 4;
201 if (
Type *Ty = getOutArgumentType(Arg)) {
203 <<
" in function " <<
F.getName() <<
'\n');
217 if (
ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
236 for (
const auto &Pair : OutArgs) {
237 bool ThisReplaceable =
true;
241 Type *ArgTy = Pair.second;
248 unsigned ArgNumRegs =
DL->getTypeStoreSize(ArgTy) / 4;
264 LLVM_DEBUG(
dbgs() <<
"Found out argument store: " << *SI <<
'\n');
267 ThisReplaceable =
false;
272 if (!ThisReplaceable)
275 for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
276 Value *ReplVal =
Store.second->getValueOperand();
278 auto &ValVec = Replacements[
Store.first];
280 [OutArg](
const std::pair<Argument *, Value *> &Entry) {
281 return Entry.first == OutArg;
284 <<
"Saw multiple out arg stores" << *OutArg <<
'\n');
287 ThisReplaceable =
false;
291 ValVec.emplace_back(OutArg, ReplVal);
292 Store.second->eraseFromParent();
295 if (ThisReplaceable) {
298 ++NumOutArgumentsReplaced;
304 if (Replacements.
empty())
311 F.getFunctionType()->params(),
314 LLVM_DEBUG(
dbgs() <<
"Computed new return type: " << *NewRetTy <<
'\n');
317 F.getName() +
".body");
318 F.getParent()->getFunctionList().insert(
F.getIterator(), NewFunc);
339 for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
349 NewRetVal =
B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
351 for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
352 NewRetVal =
B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
357 B.CreateRet(NewRetVal);
375 CallInst *StubCall =
B.CreateCall(NewFunc, StubCallArgs);
377 int RetIdx =
RetTy->isVoidTy() ? 0 : 1;
386 Value *Val =
B.CreateExtractValue(StubCall, RetIdx++);
387 B.CreateAlignedStore(Val, &Arg,
Align);
390 if (!
RetTy->isVoidTy()) {
391 B.CreateRet(
B.CreateExtractValue(StubCall, 0));
397 F.addFnAttr(Attribute::AlwaysInline);
399 ++NumOutArgumentFunctionsReplaced;
404 return new AMDGPURewriteOutArguments();
AMDGPU Rewrite Out Arguments
static cl::opt< unsigned > MaxNumRetRegs("amdgpu-max-return-arg-num-regs", cl::desc("Approximately limit number of return registers for replacing out arguments"), cl::Hidden, cl::init(16))
static cl::opt< bool > AnyAddressSpace("amdgpu-any-address-space-out-arguments", cl::desc("Replace pointer out arguments with " "struct returns for non-private address space"), cl::Hidden, cl::init(false))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
bool hasByValAttr() const
Return true if this argument has the byval attribute.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
void splice(Function::iterator ToIt, Function *FromF)
Transfer all blocks from FromF to this function at ToIt.
void stealArgumentListFrom(Function &Src)
Steal arguments from another function.
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
void setIsNewDbgInfoFormat(bool NewVal)
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
void setComdat(Comdat *C)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
This is an important class for using LLVM in a threaded context.
A memory dependence query can return one of three different answers.
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
Provides a lazy, caching interface for making common memory aliasing information queries,...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURewriteOutArgumentsPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
This struct is a compact representation of a valid (non-zero power of two) alignment.