Go to the documentation of this file.
103 #define DEBUG_TYPE "nvptx-lower-args"
105 using namespace llvm;
124 void markPointerAsGlobal(
Value *Ptr);
131 return "Lower pointer arguments of CUDA kernels";
142 "Lower arguments (NVPTX)",
false,
false)
166 assert(
I &&
"OldUser must be an instruction");
175 if (
auto *LI = dyn_cast<LoadInst>(
I.OldInstruction)) {
176 LI->setOperand(0,
I.NewParam);
179 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
I.OldInstruction)) {
184 NewGEP->setIsInBounds(
GEP->isInBounds());
187 if (
auto *BC = dyn_cast<BitCastInst>(
I.OldInstruction)) {
193 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(
I.OldInstruction)) {
206 if (NewInst && NewInst !=
I.OldInstruction) {
210 for (
Value *V :
I.OldInstruction->users())
225 I->eraseFromParent();
243 Arg->getAttribute(Attribute::Alignment).getValueAsInt();
245 if (CurArgAlign >= NewArgAlign)
248 LLVM_DEBUG(
dbgs() <<
"Try to use alignment " << NewArgAlign <<
" instead of "
249 << CurArgAlign <<
" for " << *
Arg <<
'\n');
252 Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign);
253 Arg->removeAttr(Attribute::Alignment);
254 Arg->addAttr(NewAlignAttr);
267 std::queue<LoadContext> Worklist;
268 Worklist.push({ArgInParamAS, 0});
270 while (!Worklist.empty()) {
271 LoadContext Ctx = Worklist.front();
274 for (
User *CurUser : Ctx.InitialVal->users()) {
275 if (
auto *
I = dyn_cast<LoadInst>(CurUser)) {
276 Loads.push_back({
I, Ctx.Offset});
280 if (
auto *
I = dyn_cast<BitCastInst>(CurUser)) {
281 Worklist.push({
I, Ctx.Offset});
285 if (
auto *
I = dyn_cast<GetElementPtrInst>(CurUser)) {
286 APInt OffsetAccumulated =
289 if (!
I->accumulateConstantOffset(
DL, OffsetAccumulated))
294 assert(Offset != OffsetLimit &&
"Expect Offset less than UINT64_MAX");
296 Worklist.push({
I, Ctx.Offset + Offset});
301 "bitcast, getelementptr.");
305 for (
Load &CurLoad : Loads) {
307 Align CurLoadAlign(CurLoad.Inst->getAlign());
308 CurLoad.Inst->setAlignment(
std::max(NewLoadAlign, CurLoadAlign));
312 void NVPTXLowerArgs::handleByValParam(
Argument *
Arg) {
318 auto IsALoadChain = [&](
Value *Start) {
320 auto IsALoadChainInstr = [](
Value *V) ->
bool {
321 if (isa<GetElementPtrInst>(V) || isa<BitCastInst>(V) || isa<LoadInst>(V))
324 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
331 while (!ValuesToCheck.empty()) {
333 if (!IsALoadChainInstr(V)) {
339 if (!isa<LoadInst>(V))
352 for (
Value *V : UsersToUpdate)
353 convertToParamAS(V, ArgInParamAS);
361 cast<NVPTXTargetLowering>(
TM->getSubtargetImpl()->getTargetLowering());
370 unsigned AS =
DL.getAllocaAddrSpace();
377 Arg->replaceAllUsesWith(AllocA);
387 false, AllocA->
getAlign(), FirstInst);
391 void NVPTXLowerArgs::markPointerAsGlobal(
Value *Ptr) {
399 InsertPt =
Arg->getParent()->getEntryBlock().begin();
402 InsertPt = ++cast<Instruction>(Ptr)->getIterator();
403 assert(InsertPt != InsertPt->getParent()->end() &&
404 "We don't call this function with Ptr being a terminator.");
422 bool NVPTXLowerArgs::runOnKernelFunction(
Function &
F) {
427 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I)) {
428 if (LI->getType()->isPointerTy()) {
431 if (
Arg->hasByValAttr()) {
433 markPointerAsGlobal(LI);
442 LLVM_DEBUG(
dbgs() <<
"Lowering kernel args of " <<
F.getName() <<
"\n");
444 if (
Arg.getType()->isPointerTy()) {
445 if (
Arg.hasByValAttr())
446 handleByValParam(&
Arg);
448 markPointerAsGlobal(&
Arg);
455 bool NVPTXLowerArgs::runOnDeviceFunction(
Function &
F) {
456 LLVM_DEBUG(
dbgs() <<
"Lowering function args of " <<
F.getName() <<
"\n");
458 if (
Arg.getType()->isPointerTy() &&
Arg.hasByValAttr())
459 handleByValParam(&
Arg);
469 return new NVPTXLowerArgs(
TM);
This class represents an incoming formal argument to a Function.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL) const
getFunctionParamOptimizedAlign - since function arguments are passed via .param space,...
This is an optimization pass for GlobalISel generic memory operations.
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
T greatestCommonDivisor(T A, T B)
Return the greatest common divisor of the values using Euclid's algorithm.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
SmallVector< Instruction * > InstructionsToDelete
The instances of the Type class are immutable: once they are created, they are never changed.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
FunctionPass * createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM)
LLVM_NODISCARD T pop_back_val()
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
assert(I &&"OldUser must be an instruction")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
This class represents a conversion between pointers from one address space to another.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Instruction * OldInstruction
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isKernelFunction(const Function &F)
An instruction for storing to memory.
Class for arbitrary precision integers.
void setOperand(unsigned i, Value *Val)
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
INITIALIZE_PASS(NVPTXLowerArgs, "nvptx-lower-args", "Lower arguments (NVPTX)", false, false) static void convertToParamAS(Value *OldUser
Class to represent struct types.
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t value() const
This is a hole in the type system and should not be abused.
static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, const NVPTXTargetLowering *TLI)
void setAlignment(Align Align)
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
void initializeNVPTXLowerArgsPass(PassRegistry &)
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
SmallVector< IP > ItemsToConvert
an instruction to allocate memory on the stack
LLVM Value Representation.
iterator_range< user_iterator > users()