151#include "llvm/IR/IntrinsicsNVPTX.h"
161#define DEBUG_TYPE "nvptx-lower-args"
174 return "Lower pointer arguments of CUDA kernels";
182char NVPTXLowerArgsLegacyPass::ID = 1;
185 "Lower arguments (NVPTX)",
false,
false)
215 const auto CloneInstInParamAS = [](
const IP &
I) ->
Value * {
218 LI->setOperand(0,
I.NewParam);
224 GEP->getSourceElementType(),
I.NewParam, Indices,
GEP->getName(),
226 NewGEP->setNoWrapFlags(
GEP->getNoWrapFlags());
233 BC->getName(), BC->getIterator());
242 if (
MI->getRawSource() ==
I.OldUse->get()) {
247 CallInst *
B = Builder.CreateMemTransferInst(
248 ID,
MI->getRawDest(),
MI->getDestAlign(),
I.NewParam,
249 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
250 for (
unsigned I : {0, 1})
251 if (
uint64_t Bytes =
MI->getParamDereferenceableBytes(
I))
252 B->addDereferenceableParamAttr(
I, Bytes);
260 auto ItemsToConvert =
264 while (!ItemsToConvert.empty()) {
265 IP
I = ItemsToConvert.pop_back_val();
266 Value *NewInst = CloneInstInParamAS(
I);
269 if (NewInst && NewInst != OldInst) {
274 ItemsToConvert.push_back({&U, NewInst});
276 InstructionsToDelete.push_back(OldInst);
288 I->eraseFromParent();
299 if (CurrentAlign >= OptimizedAlign)
303 <<
" instead of " << CurrentAlign.
value() <<
" for " << *Arg
309 return OptimizedAlign;
331 std::queue<LoadContext> Worklist;
332 Worklist.push({Val, 0});
334 while (!Worklist.empty()) {
335 LoadContext Ctx = Worklist.front();
338 for (
User *CurUser : Ctx.InitialVal->
users()) {
344 APInt OffsetAccumulated =
347 if (!
I->accumulateConstantOffset(
DL, OffsetAccumulated))
352 assert(
Offset != OffsetLimit &&
"Expect Offset less than UINT64_MAX");
354 Worklist.push({
I, Ctx.Offset +
Offset});
359 for (Load &CurLoad : Loads) {
361 Align CurLoadAlign = CurLoad.Inst->getAlign();
362 CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign));
371 Intrinsic::nvvm_internal_addrspace_wrap,
380 Arg.
addAttr(Attribute::ReadOnly);
387 using Base = PtrUseVisitor<ArgUseChecker>;
389 SmallPtrSet<Instruction *, 4> Conditionals;
391 ArgUseChecker(
const DataLayout &
DL) : PtrUseVisitor(
DL) {}
393 PtrInfo visitArgPtr(Argument &
A) {
394 assert(
A.getType()->isPointerTy());
396 IsOffsetKnown =
false;
407 while (!(Worklist.empty() || PI.isAborted())) {
408 UseToVisit ToVisit = Worklist.pop_back_val();
409 U = ToVisit.UseAndIsOffsetKnown.getPointer();
415 LLVM_DEBUG(
dbgs() <<
"Argument pointer escaped: " << *PI.getEscapingInst()
417 else if (PI.isAborted())
418 LLVM_DEBUG(
dbgs() <<
"Pointer use needs a copy: " << *PI.getAbortingInst()
421 <<
" conditionals\n");
425 void visitStoreInst(StoreInst &SI) {
427 if (
U->get() ==
SI.getValueOperand())
428 return PI.setEscapedAndAborted(&SI);
433 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
436 return PI.setEscapedAndAborted(&ASC);
442 void visitPHINodeOrSelectInst(Instruction &
I) {
445 Conditionals.insert(&
I);
448 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
449 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
453 void visitMemTransferInst(MemTransferInst &
II) {
454 if (*U ==
II.getRawDest())
458 void visitMemSetInst(MemSetInst &
II) { PI.setAborted(&
II); }
480 IRB.CreateMemCpy(AllocA, AllocA->
getAlign(), ArgInParamAS, AllocA->
getAlign(),
492 if (
II->getIntrinsicID() == Intrinsic::nvvm_internal_addrspace_wrap)
499 const bool HasCvtaParam) {
512 ArgUseChecker AUC(
DL);
513 ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(*Arg);
514 const bool ArgUseIsReadOnly = !(PI.isEscaped() || PI.isAborted());
515 if (ArgUseIsReadOnly && AUC.Conditionals.
empty()) {
520 for (
Use *U : UsesToUpdate)
529 LLVM_DEBUG(
dbgs() <<
"Using non-copy pointer to " << *Arg <<
"\n");
550 copyByValParam(
F, *Arg);
558 const bool HasCvtaParam = ST->hasCvtaParam();
560 LLVM_DEBUG(
dbgs() <<
"Lowering kernel args of " <<
F.getName() <<
"\n");
573 LLVM_DEBUG(
dbgs() <<
"Lowering function args of " <<
F.getName() <<
"\n");
593bool NVPTXLowerArgsLegacyPass::runOnFunction(
Function &
F) {
594 auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
599 return new NVPTXLowerArgsLegacyPass();
603 LLVM_DEBUG(
dbgs() <<
"Creating a copy of byval args of " <<
F.getName()
609 copyByValParam(
F, Arg);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
NVPTX address space definition.
static bool runOnDeviceFunction(Function &F)
nvptx lower Lower static false void convertToParamAS(ArrayRef< Use * > OldUses, Value *Param)
Recursively convert the users of a param to the param address space.
static CallInst * createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB, Argument &Arg)
static void lowerKernelByValParam(Argument *Arg, Function &F, const bool HasCvtaParam)
static bool copyFunctionByValArgs(Function &F)
static bool argIsProcessed(Argument *Arg)
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
static Align setByValParamAlign(Argument *Arg)
static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F)
static void propagateAlignmentToLoads(Value *Val, Align NewAlign, const DataLayout &DL)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
unsigned getDestAddressSpace() const
Returns the address space of the result.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
LLVM_ABI void addAttr(Attribute::AttrKind Kind)
LLVM_ABI bool hasByValAttr() const
Return true if this argument has the byval attribute.
LLVM_ABI void removeAttr(Attribute::AttrKind Kind)
Remove attributes from an argument.
const Function * getParent() const
LLVM_ABI Type * getParamByValType() const
If this is a byval argument, return its type.
LLVM_ABI MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void visit(Iterator Start, Iterator End)
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
An instruction for reading from memory.
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A base class for visitors over the uses of a pointer value.
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitPtrToIntInst(PtrToIntInst &I)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ADDRESS_SPACE_ENTRY_PARAM
constexpr StringLiteral GridConstant("nvvm.grid_constant")
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
FunctionPass * createNVPTXLowerArgsPass()
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
bool isParamGridConstant(const Argument &Arg)
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL)
Since function arguments are passed via .param space, we may want to increase their alignment in a wa...
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)