106#define DEBUG_TYPE "nvptx-lower-args"
127 void markPointerAsGlobal(
Value *
Ptr);
133 return "Lower pointer arguments of CUDA kernels";
141char NVPTXLowerArgs::ID = 1;
144 "Lower arguments (NVPTX)",
false,
false)
171 assert(
I &&
"OldUser must be an instruction");
179 auto CloneInstInParamAS = [](
const IP &
I) ->
Value * {
180 if (
auto *LI = dyn_cast<LoadInst>(
I.OldInstruction)) {
181 LI->setOperand(0,
I.NewParam);
184 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
I.OldInstruction)) {
189 NewGEP->setIsInBounds(
GEP->isInBounds());
192 if (
auto *BC = dyn_cast<BitCastInst>(
I.OldInstruction)) {
193 auto *NewBCType = PointerType::getWithSamePointeeType(
195 return BitCastInst::Create(BC->getOpcode(),
I.NewParam, NewBCType,
198 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(
I.OldInstruction)) {
207 while (!ItemsToConvert.
empty()) {
209 Value *NewInst = CloneInstInParamAS(
I);
211 if (NewInst && NewInst !=
I.OldInstruction) {
215 for (
Value *V :
I.OldInstruction->users())
216 ItemsToConvert.
push_back({cast<Instruction>(V), NewInst});
218 InstructionsToDelete.
push_back(
I.OldInstruction);
230 I->eraseFromParent();
248 Arg->getAttribute(Attribute::Alignment).getValueAsInt();
250 if (CurArgAlign >= NewArgAlign)
253 LLVM_DEBUG(
dbgs() <<
"Try to use alignment " << NewArgAlign <<
" instead of "
254 << CurArgAlign <<
" for " << *
Arg <<
'\n');
257 Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign);
258 Arg->removeAttr(Attribute::Alignment);
259 Arg->addAttr(NewAlignAttr);
272 std::queue<LoadContext> Worklist;
273 Worklist.push({ArgInParamAS, 0});
275 while (!Worklist.empty()) {
276 LoadContext Ctx = Worklist.
front();
279 for (
User *CurUser : Ctx.InitialVal->users()) {
280 if (
auto *
I = dyn_cast<LoadInst>(CurUser)) {
285 if (
auto *
I = dyn_cast<BitCastInst>(CurUser)) {
286 Worklist.push({
I, Ctx.Offset});
290 if (
auto *
I = dyn_cast<GetElementPtrInst>(CurUser)) {
291 APInt OffsetAccumulated =
294 if (!
I->accumulateConstantOffset(
DL, OffsetAccumulated))
299 assert(
Offset != OffsetLimit &&
"Expect Offset less than UINT64_MAX");
301 Worklist.push({
I, Ctx.Offset +
Offset});
306 "bitcast, getelementptr.");
310 for (Load &CurLoad : Loads) {
311 Align NewLoadAlign(std::gcd(NewArgAlign, CurLoad.Offset));
312 Align CurLoadAlign(CurLoad.Inst->getAlign());
313 CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign));
324 auto IsALoadChain = [&](
Value *Start) {
326 auto IsALoadChainInstr = [](
Value *
V) ->
bool {
327 if (isa<GetElementPtrInst>(V) || isa<BitCastInst>(V) || isa<LoadInst>(V))
330 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
337 while (!ValuesToCheck.
empty()) {
339 if (!IsALoadChainInstr(V)) {
345 if (!isa<LoadInst>(V))
358 for (
Value *V : UsersToUpdate)
363 cast<NVPTXTargetLowering>(
TM.getSubtargetImpl()->getTargetLowering());
372 unsigned AS =
DL.getAllocaAddrSpace();
379 Arg->replaceAllUsesWith(AllocA);
389 false, AllocA->
getAlign(), FirstInst);
393void NVPTXLowerArgs::markPointerAsGlobal(
Value *
Ptr) {
401 InsertPt =
Arg->getParent()->getEntryBlock().begin();
404 InsertPt = ++cast<Instruction>(
Ptr)->getIterator();
405 assert(InsertPt != InsertPt->getParent()->end() &&
406 "We don't call this function with Ptr being a terminator.");
411 PointerType::getWithSamePointeeType(cast<PointerType>(
Ptr->getType()),
413 Ptr->getName(), &*InsertPt);
415 Ptr->getName(), &*InsertPt);
417 Ptr->replaceAllUsesWith(PtrInGeneric);
430 auto HandleIntToPtr = [
this](
Value &
V) {
440 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I)) {
441 if (LI->getType()->isPointerTy() || LI->getType()->isIntegerTy()) {
444 if (
Arg->hasByValAttr()) {
446 if (LI->getType()->isPointerTy())
447 markPointerAsGlobal(LI);
458 LLVM_DEBUG(
dbgs() <<
"Lowering kernel args of " <<
F.getName() <<
"\n");
460 if (
Arg.getType()->isPointerTy()) {
461 if (
Arg.hasByValAttr())
462 handleByValParam(
TM, &
Arg);
464 markPointerAsGlobal(&
Arg);
465 }
else if (
Arg.getType()->isIntegerTy() &&
476 LLVM_DEBUG(
dbgs() <<
"Lowering function args of " <<
F.getName() <<
"\n");
478 if (
Arg.getType()->isPointerTy() &&
Arg.hasByValAttr())
479 handleByValParam(
TM, &
Arg);
483bool NVPTXLowerArgs::runOnFunction(
Function &
F) {
487 : runOnDeviceFunction(
TM,
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
nvptx lower Lower arguments(NVPTX)"
static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, const NVPTXTargetLowering *TLI)
nvptx lower Lower static false void convertToParamAS(Value *OldUser, Value *Param)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
InstListType::iterator iterator
Instruction iterators...
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
An instruction for reading from memory.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL) const
getFunctionParamOptimizedAlign - since function arguments are passed via .param space,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Class to represent struct types.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
void initializeNVPTXLowerArgsPass(PassRegistry &)
FunctionPass * createNVPTXLowerArgsPass()
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isKernelFunction(const Function &F)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.