106#define DEBUG_TYPE "nvptx-lower-args"
127 void markPointerAsGlobal(
Value *
Ptr);
133 return "Lower pointer arguments of CUDA kernels";
141char NVPTXLowerArgs::ID = 1;
144 "Lower arguments (NVPTX)",
false,
false)
171 assert(
I &&
"OldUser must be an instruction");
179 auto CloneInstInParamAS = [](
const IP &
I) ->
Value * {
180 if (
auto *LI = dyn_cast<LoadInst>(
I.OldInstruction)) {
181 LI->setOperand(0,
I.NewParam);
184 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
I.OldInstruction)) {
189 NewGEP->setIsInBounds(
GEP->isInBounds());
192 if (
auto *BC = dyn_cast<BitCastInst>(
I.OldInstruction)) {
194 return BitCastInst::Create(BC->getOpcode(),
I.NewParam, NewBCType,
197 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(
I.OldInstruction)) {
206 while (!ItemsToConvert.
empty()) {
208 Value *NewInst = CloneInstInParamAS(
I);
210 if (NewInst && NewInst !=
I.OldInstruction) {
214 for (
Value *V :
I.OldInstruction->users())
215 ItemsToConvert.
push_back({cast<Instruction>(V), NewInst});
217 InstructionsToDelete.
push_back(
I.OldInstruction);
229 I->eraseFromParent();
249 if (CurArgAlign >= NewArgAlign)
252 LLVM_DEBUG(
dbgs() <<
"Try to use alignment " << NewArgAlign <<
" instead of "
253 << CurArgAlign <<
" for " << *Arg <<
'\n');
256 Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign);
271 std::queue<LoadContext> Worklist;
272 Worklist.push({ArgInParamAS, 0});
274 while (!Worklist.empty()) {
275 LoadContext Ctx = Worklist.
front();
278 for (
User *CurUser : Ctx.InitialVal->users()) {
279 if (
auto *
I = dyn_cast<LoadInst>(CurUser)) {
284 if (
auto *
I = dyn_cast<BitCastInst>(CurUser)) {
285 Worklist.push({
I, Ctx.Offset});
289 if (
auto *
I = dyn_cast<GetElementPtrInst>(CurUser)) {
290 APInt OffsetAccumulated =
293 if (!
I->accumulateConstantOffset(
DL, OffsetAccumulated))
298 assert(
Offset != OffsetLimit &&
"Expect Offset less than UINT64_MAX");
300 Worklist.push({
I, Ctx.Offset +
Offset});
305 "bitcast, getelementptr.");
309 for (Load &CurLoad : Loads) {
310 Align NewLoadAlign(std::gcd(NewArgAlign, CurLoad.Offset));
311 Align CurLoadAlign(CurLoad.Inst->getAlign());
312 CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign));
323 auto IsALoadChain = [&](
Value *Start) {
325 auto IsALoadChainInstr = [](
Value *
V) ->
bool {
326 if (isa<GetElementPtrInst>(V) || isa<BitCastInst>(V) || isa<LoadInst>(V))
329 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
336 while (!ValuesToCheck.
empty()) {
338 if (!IsALoadChainInstr(V)) {
339 LLVM_DEBUG(
dbgs() <<
"Need a copy of " << *Arg <<
" because of " << *V
344 if (!isa<LoadInst>(V))
357 for (
Value *V : UsersToUpdate)
362 cast<NVPTXTargetLowering>(
TM.getSubtargetImpl()->getTargetLowering());
371 unsigned AS =
DL.getAllocaAddrSpace();
388 false, AllocA->
getAlign(), FirstInst);
392void NVPTXLowerArgs::markPointerAsGlobal(
Value *
Ptr) {
403 InsertPt = ++cast<Instruction>(
Ptr)->getIterator();
404 assert(InsertPt != InsertPt->getParent()->end() &&
405 "We don't call this function with Ptr being a terminator.");
410 Ptr->getName(), &*InsertPt);
412 Ptr->getName(), &*InsertPt);
414 Ptr->replaceAllUsesWith(PtrInGeneric);
427 auto HandleIntToPtr = [
this](
Value &
V) {
430 for (
User *U : UsersToUpdate)
431 markPointerAsGlobal(U);
438 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I)) {
439 if (LI->getType()->isPointerTy() || LI->getType()->isIntegerTy()) {
441 if (
Argument *Arg = dyn_cast<Argument>(UO)) {
444 if (LI->getType()->isPointerTy())
445 markPointerAsGlobal(LI);
456 LLVM_DEBUG(
dbgs() <<
"Lowering kernel args of " <<
F.getName() <<
"\n");
460 handleByValParam(
TM, &Arg);
462 markPointerAsGlobal(&Arg);
474 LLVM_DEBUG(
dbgs() <<
"Lowering function args of " <<
F.getName() <<
"\n");
477 handleByValParam(
TM, &Arg);
481bool NVPTXLowerArgs::runOnFunction(
Function &
F) {
485 : runOnDeviceFunction(
TM,
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
nvptx lower Lower arguments(NVPTX)"
static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, const NVPTXTargetLowering *TLI)
nvptx lower Lower static false void convertToParamAS(Value *OldUser, Value *Param)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
Attribute getAttribute(Attribute::AttrKind Kind) const
void addAttr(Attribute::AttrKind Kind)
bool hasByValAttr() const
Return true if this argument has the byval attribute.
void removeAttr(Attribute::AttrKind Kind)
Remove attributes from an argument.
const Function * getParent() const
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Type * getParamByValType() const
If this is a byval argument, return its type.
uint64_t getValueAsInt() const
Return the attribute's value as an integer.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
An instruction for reading from memory.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL) const
getFunctionParamOptimizedAlign - since function arguments are passed via .param space,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Class to represent struct types.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isIntegerTy() const
True if this is an instance of IntegerType.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
void initializeNVPTXLowerArgsPass(PassRegistry &)
FunctionPass * createNVPTXLowerArgsPass()
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isKernelFunction(const Function &F)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.