doxygen/NVPTXLowerArgs_8cpp_source.html

//===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

//

// Arguments to kernel and device functions are passed via param space,

// which imposes certain restrictions:

// http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces

//

// Kernel parameters are read-only and accessible only via ld.param

// instruction, directly or via a pointer.

//

// Device function parameters are directly accessible via

// ld.param/st.param, but taking the address of one returns a pointer

// to a copy created in local space which *can't* be used with

// ld.param/st.param.

//

// Copying a byval struct into local memory in IR allows us to enforce

// the param space restrictions, gives the rest of IR a pointer w/o

// param space restrictions, and gives us an opportunity to eliminate

// the copy.

//

// Pointer arguments to kernel functions need more work to be lowered:

//

// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the

//    global address space. This allows later optimizations to emit

//    ld.global.*/st.global.* for accessing these pointer arguments. For

//    example,

//

//    define void @foo(float* %input) {

//      %v = load float, float* %input, align 4

//      ...

//    }

//

//    becomes

//

//    define void @foo(float* %input) {

//      %input2 = addrspacecast float* %input to float addrspace(1)*

//      %input3 = addrspacecast float addrspace(1)* %input2 to float*

//      %v = load float, float* %input3, align 4

//      ...

//    }

//

//    Later, NVPTXInferAddressSpaces will optimize it to

//

//    define void @foo(float* %input) {

//      %input2 = addrspacecast float* %input to float addrspace(1)*

//      %v = load float, float addrspace(1)* %input2, align 4

//      ...

//    }

//

// 2. Convert byval kernel parameters to pointers in the param address space

//    (so that NVPTX emits ld/st.param).  Convert pointers *within* a byval

//    kernel parameter to pointers in the global address space. This allows

//    NVPTX to emit ld/st.global.

//

//    struct S {

//      int *x;

//      int *y;

//    };

//    __global__ void foo(S s) {

//      int *b = s.y;

//      // use b

//    }

//

//    "b" points to the global address space. In the IR level,

//

//    define void @foo(ptr byval %input) {

//      %b_ptr = getelementptr {ptr, ptr}, ptr %input, i64 0, i32 1

//      %b = load ptr, ptr %b_ptr

//      ; use %b

//    }

//

//    becomes

//

//    define void @foo({i32*, i32*}* byval %input) {

//      %b_param = addrspacecat ptr %input to ptr addrspace(101)

//      %b_ptr = getelementptr {ptr, ptr}, ptr addrspace(101) %b_param, i64 0, i32 1

//      %b = load ptr, ptr addrspace(101) %b_ptr

//      %b_global = addrspacecast ptr %b to ptr addrspace(1)

//      ; use %b_generic

//    }

//

//    Create a local copy of kernel byval parameters used in a way that *might* mutate

//    the parameter, by storing it in an alloca. Mutations to "grid_constant" parameters

//    are undefined behaviour, and don't require local copies.

//

//    define void @foo(ptr byval(%struct.s) align 4 %input) {

//       store i32 42, ptr %input

//       ret void

//    }

//

//    becomes

//

//    define void @foo(ptr byval(%struct.s) align 4 %input) #1 {

//      %input1 = alloca %struct.s, align 4

//      %input2 = addrspacecast ptr %input to ptr addrspace(101)

//      %input3 = load %struct.s, ptr addrspace(101) %input2, align 4

//      store %struct.s %input3, ptr %input1, align 4

//      store i32 42, ptr %input1, align 4

//      ret void

//    }

//

//    If %input were passed to a device function, or written to memory,

//    conservatively assume that %input gets mutated, and create a local copy.

//

//    Convert param pointers to grid_constant byval kernel parameters that are

//    passed into calls (device functions, intrinsics, inline asm), or otherwise

//    "escape" (into stores/ptrtoints) to the generic address space, using the

//    `nvvm.ptr.param.to.gen` intrinsic, so that NVPTX emits cvta.param

//    (available for sm70+)

//

//    define void @foo(ptr byval(%struct.s) %input) {

//      ; %input is a grid_constant

//      %call = call i32 @escape(ptr %input)

//      ret void

//    }

//

//    becomes

//

//    define void @foo(ptr byval(%struct.s) %input) {

//      %input1 = addrspacecast ptr %input to ptr addrspace(101)

//      ; the following intrinsic converts pointer to generic. We don't use an addrspacecast

//      ; to prevent generic -> param -> generic from getting cancelled out

//      %input1.gen = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) %input1)

//      %call = call i32 @escape(ptr %input1.gen)

//      ret void

//    }

//

// TODO: merge this pass with NVPTXInferAddressSpaces so that other passes don't

// cancel the addrspacecast pair this pass emits.

//===----------------------------------------------------------------------===//


#include "MCTargetDesc/NVPTXBaseInfo.h"

#include "NVPTX.h"

#include "NVPTXTargetMachine.h"

#include "NVPTXUtilities.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/Analysis/PtrUseVisitor.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/IntrinsicsNVPTX.h"

#include "llvm/IR/Type.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include <numeric>

#include <queue>


#define DEBUG_TYPE "nvptx-lower-args"


using namespace llvm;


namespace llvm {

void initializeNVPTXLowerArgsPass(PassRegistry &);

}


namespace {

class NVPTXLowerArgs : public FunctionPass {

  bool runOnFunction(Function &F) override;


  bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F);

  bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F);


  // handle byval parameters

  void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg);

  // Knowing Ptr must point to the global address space, this function

  // addrspacecasts Ptr to global and then back to generic. This allows

  // NVPTXInferAddressSpaces to fold the global-to-generic cast into

  // loads/stores that appear later.

  void markPointerAsGlobal(Value *Ptr);


public:

  static char ID; // Pass identification, replacement for typeid

  NVPTXLowerArgs() : FunctionPass(ID) {}

  StringRef getPassName() const override {

    return "Lower pointer arguments of CUDA kernels";

  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<TargetPassConfig>();

  }

};

} // namespace


char NVPTXLowerArgs::ID = 1;


INITIALIZE_PASS_BEGIN(NVPTXLowerArgs, "nvptx-lower-args",

                      "Lower arguments (NVPTX)", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_END(NVPTXLowerArgs, "nvptx-lower-args",

                    "Lower arguments (NVPTX)", false, false)


// =============================================================================

// If the function had a byval struct ptr arg, say foo(%struct.x* byval %d),

// and we can't guarantee that the only accesses are loads,

// then add the following instructions to the first basic block:

//

// %temp = alloca %struct.x, align 8

// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)*

// %tv = load %struct.x addrspace(101)* %tempd

// store %struct.x %tv, %struct.x* %temp, align 8

//

// The above code allocates some space in the stack and copies the incoming

// struct from param space to local space.

// Then replace all occurrences of %d by %temp.

//

// In case we know that all users are GEPs or Loads, replace them with the same

// ones in parameter AS, so we can access them using ld.param.

// =============================================================================


// For Loads, replaces the \p OldUse of the pointer with a Use of the same

// pointer in parameter AS.

// For "escapes" (to memory, a function call, or a ptrtoint), cast the OldUse to

// generic using cvta.param.

static void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam,

                             bool IsGridConstant) {

  Instruction *I = dyn_cast<Instruction>(OldUse->getUser());

  assert(I && "OldUse must be in an instruction");

  struct IP {

    Use *OldUse;

    Instruction *OldInstruction;

    Value *NewParam;

  };

  SmallVector<IP> ItemsToConvert = {{OldUse, I, Param}};

  SmallVector<Instruction *> InstructionsToDelete;


  auto CloneInstInParamAS = [HasCvtaParam,

                             IsGridConstant](const IP &I) -> Value * {

    if (auto *LI = dyn_cast<LoadInst>(I.OldInstruction)) {

      LI->setOperand(0, I.NewParam);

      return LI;

    }

    if (auto *GEP = dyn_cast<GetElementPtrInst>(I.OldInstruction)) {

      SmallVector<Value *, 4> Indices(GEP->indices());

      auto *NewGEP = GetElementPtrInst::Create(

          GEP->getSourceElementType(), I.NewParam, Indices, GEP->getName(),

          GEP->getIterator());

      NewGEP->setIsInBounds(GEP->isInBounds());

      return NewGEP;

    }

    if (auto *BC = dyn_cast<BitCastInst>(I.OldInstruction)) {

      auto *NewBCType = PointerType::get(BC->getContext(), ADDRESS_SPACE_PARAM);

      return BitCastInst::Create(BC->getOpcode(), I.NewParam, NewBCType,

                                 BC->getName(), BC->getIterator());

    }

    if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I.OldInstruction)) {

      assert(ASC->getDestAddressSpace() == ADDRESS_SPACE_PARAM);

      (void)ASC;

      // Just pass through the argument, the old ASC is no longer needed.

      return I.NewParam;

    }

    if (auto *MI = dyn_cast<MemTransferInst>(I.OldInstruction)) {

      if (MI->getRawSource() == I.OldUse->get()) {

        // convert to memcpy/memmove from param space.

        IRBuilder<> Builder(I.OldInstruction);

        Intrinsic::ID ID = MI->getIntrinsicID();


        CallInst *B = Builder.CreateMemTransferInst(

            ID, MI->getRawDest(), MI->getDestAlign(), I.NewParam,

            MI->getSourceAlign(), MI->getLength(), MI->isVolatile());

        for (unsigned I : {0, 1})

          if (uint64_t Bytes = MI->getParamDereferenceableBytes(I))

            B->addDereferenceableParamAttr(I, Bytes);

        return B;

      }

      // We may be able to handle other cases if the argument is

      // __grid_constant__

    }


    if (HasCvtaParam) {

      auto GetParamAddrCastToGeneric =

          [](Value *Addr, Instruction *OriginalUser) -> Value * {

        PointerType *ReturnTy =

            PointerType::get(OriginalUser->getContext(), ADDRESS_SPACE_GENERIC);

        Function *CvtToGen = Intrinsic::getOrInsertDeclaration(

            OriginalUser->getModule(), Intrinsic::nvvm_ptr_param_to_gen,

            {ReturnTy, PointerType::get(OriginalUser->getContext(),

                                        ADDRESS_SPACE_PARAM)});


        // Cast param address to generic address space

        Value *CvtToGenCall =

            CallInst::Create(CvtToGen, Addr, Addr->getName() + ".gen",

                             OriginalUser->getIterator());

        return CvtToGenCall;

      };

      auto *ParamInGenericAS =

          GetParamAddrCastToGeneric(I.NewParam, I.OldInstruction);


      // phi/select could use generic arg pointers w/o __grid_constant__

      if (auto *PHI = dyn_cast<PHINode>(I.OldInstruction)) {

        for (auto [Idx, V] : enumerate(PHI->incoming_values())) {

          if (V.get() == I.OldUse->get())

            PHI->setIncomingValue(Idx, ParamInGenericAS);

        }

      }

      if (auto *SI = dyn_cast<SelectInst>(I.OldInstruction)) {

        if (SI->getTrueValue() == I.OldUse->get())

          SI->setTrueValue(ParamInGenericAS);

        if (SI->getFalseValue() == I.OldUse->get())

          SI->setFalseValue(ParamInGenericAS);

      }


      // Escapes or writes can only use generic param pointers if

      // __grid_constant__ is in effect.

      if (IsGridConstant) {

        if (auto *CI = dyn_cast<CallInst>(I.OldInstruction)) {

          I.OldUse->set(ParamInGenericAS);

          return CI;

        }

        if (auto *SI = dyn_cast<StoreInst>(I.OldInstruction)) {

          // byval address is being stored, cast it to generic

          if (SI->getValueOperand() == I.OldUse->get())

            SI->setOperand(0, ParamInGenericAS);

          return SI;

        }

        if (auto *PI = dyn_cast<PtrToIntInst>(I.OldInstruction)) {

          if (PI->getPointerOperand() == I.OldUse->get())

            PI->setOperand(0, ParamInGenericAS);

          return PI;

        }

        // TODO: iIf we allow stores, we should allow memcpy/memset to

        // parameter, too.

      }

    }


    llvm_unreachable("Unsupported instruction");

  };


  while (!ItemsToConvert.empty()) {

    IP I = ItemsToConvert.pop_back_val();

    Value *NewInst = CloneInstInParamAS(I);


    if (NewInst && NewInst != I.OldInstruction) {

      // We've created a new instruction. Queue users of the old instruction to

      // be converted and the instruction itself to be deleted. We can't delete

      // the old instruction yet, because it's still in use by a load somewhere.

      for (Use &U : I.OldInstruction->uses())

        ItemsToConvert.push_back({&U, cast<Instruction>(U.getUser()), NewInst});


      InstructionsToDelete.push_back(I.OldInstruction);

    }

  }


  // Now we know that all argument loads are using addresses in parameter space

  // and we can finally remove the old instructions in generic AS.  Instructions

  // scheduled for removal should be processed in reverse order so the ones

  // closest to the load are deleted first. Otherwise they may still be in use.

  // E.g if we have Value = Load(BitCast(GEP(arg))), InstructionsToDelete will

  // have {GEP,BitCast}. GEP can't be deleted first, because it's still used by

  // the BitCast.

  for (Instruction *I : llvm::reverse(InstructionsToDelete))

    I->eraseFromParent();

}


// Adjust alignment of arguments passed byval in .param address space. We can

// increase alignment of such arguments in a way that ensures that we can

// effectively vectorize their loads. We should also traverse all loads from

// byval pointer and adjust their alignment, if those were using known offset.

// Such alignment changes must be conformed with parameter store and load in

// NVPTXTargetLowering::LowerCall.

static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,

                                    const NVPTXTargetLowering *TLI) {

  Function *Func = Arg->getParent();

  Type *StructType = Arg->getParamByValType();

  const DataLayout &DL = Func->getDataLayout();


  uint64_t NewArgAlign =

      TLI->getFunctionParamOptimizedAlign(Func, StructType, DL).value();

  uint64_t CurArgAlign =

      Arg->getAttribute(Attribute::Alignment).getValueAsInt();


  if (CurArgAlign >= NewArgAlign)

    return;


  LLVM_DEBUG(dbgs() << "Try to use alignment " << NewArgAlign << " instead of "

                    << CurArgAlign << " for " << *Arg << '\n');


  auto NewAlignAttr =

      Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign);

  Arg->removeAttr(Attribute::Alignment);

  Arg->addAttr(NewAlignAttr);


  struct Load {

    LoadInst *Inst;

    uint64_t Offset;

  };


  struct LoadContext {

    Value *InitialVal;

    uint64_t Offset;

  };


  SmallVector<Load> Loads;

  std::queue<LoadContext> Worklist;

  Worklist.push({ArgInParamAS, 0});

  bool IsGridConstant = isParamGridConstant(*Arg);


  while (!Worklist.empty()) {

    LoadContext Ctx = Worklist.front();

    Worklist.pop();


    for (User *CurUser : Ctx.InitialVal->users()) {

      if (auto *I = dyn_cast<LoadInst>(CurUser)) {

        Loads.push_back({I, Ctx.Offset});

        continue;

      }


      if (auto *I = dyn_cast<BitCastInst>(CurUser)) {

        Worklist.push({I, Ctx.Offset});

        continue;

      }


      if (auto *I = dyn_cast<GetElementPtrInst>(CurUser)) {

        APInt OffsetAccumulated =

            APInt::getZero(DL.getIndexSizeInBits(ADDRESS_SPACE_PARAM));


        if (!I->accumulateConstantOffset(DL, OffsetAccumulated))

          continue;


        uint64_t OffsetLimit = -1;

        uint64_t Offset = OffsetAccumulated.getLimitedValue(OffsetLimit);

        assert(Offset != OffsetLimit && "Expect Offset less than UINT64_MAX");


        Worklist.push({I, Ctx.Offset + Offset});

        continue;

      }


      if (isa<MemTransferInst>(CurUser))

        continue;


      // supported for grid_constant

      if (IsGridConstant &&

          (isa<CallInst>(CurUser) || isa<StoreInst>(CurUser) ||

           isa<PtrToIntInst>(CurUser)))

        continue;


      llvm_unreachable("All users must be one of: load, "

                       "bitcast, getelementptr, call, store, ptrtoint");

    }

  }


  for (Load &CurLoad : Loads) {

    Align NewLoadAlign(std::gcd(NewArgAlign, CurLoad.Offset));

    Align CurLoadAlign(CurLoad.Inst->getAlign());

    CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign));

  }

}


namespace {

struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {

  using Base = PtrUseVisitor<ArgUseChecker>;


  bool IsGridConstant;

  // Set of phi/select instructions using the Arg

  SmallPtrSet<Instruction *, 4> Conditionals;


  ArgUseChecker(const DataLayout &DL, bool IsGridConstant)

      : PtrUseVisitor(DL), IsGridConstant(IsGridConstant) {}


  PtrInfo visitArgPtr(Argument &A) {

    assert(A.getType()->isPointerTy());

    IntegerType *IntIdxTy = cast<IntegerType>(DL.getIndexType(A.getType()));

    IsOffsetKnown = false;

    Offset = APInt(IntIdxTy->getBitWidth(), 0);

    PI.reset();

    Conditionals.clear();


    LLVM_DEBUG(dbgs() << "Checking Argument " << A << "\n");

    // Enqueue the uses of this pointer.

    enqueueUsers(A);


    // Visit all the uses off the worklist until it is empty.

    // Note that unlike PtrUseVisitor we intentionally do not track offsets.

    // We're only interested in how we use the pointer.

    while (!(Worklist.empty() || PI.isAborted())) {

      UseToVisit ToVisit = Worklist.pop_back_val();

      U = ToVisit.UseAndIsOffsetKnown.getPointer();

      Instruction *I = cast<Instruction>(U->getUser());

      if (isa<PHINode>(I) || isa<SelectInst>(I))

        Conditionals.insert(I);

      LLVM_DEBUG(dbgs() << "Processing " << *I << "\n");

      Base::visit(I);

    }

    if (PI.isEscaped())

      LLVM_DEBUG(dbgs() << "Argument pointer escaped: " << *PI.getEscapingInst()

                        << "\n");

    else if (PI.isAborted())

      LLVM_DEBUG(dbgs() << "Pointer use needs a copy: " << *PI.getAbortingInst()

                        << "\n");

    LLVM_DEBUG(dbgs() << "Traversed " << Conditionals.size()

                      << " conditionals\n");

    return PI;

  }


  void visitStoreInst(StoreInst &SI) {

    // Storing the pointer escapes it.

    if (U->get() == SI.getValueOperand())

      return PI.setEscapedAndAborted(&SI);

    // Writes to the pointer are UB w/ __grid_constant__, but do not force a

    // copy.

    if (!IsGridConstant)

      return PI.setAborted(&SI);

  }


  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {

    // ASC to param space are no-ops and do not need a copy

    if (ASC.getDestAddressSpace() != ADDRESS_SPACE_PARAM)

      return PI.setEscapedAndAborted(&ASC);

    Base::visitAddrSpaceCastInst(ASC);

  }


  void visitPtrToIntInst(PtrToIntInst &I) {

    if (IsGridConstant)

      return;

    Base::visitPtrToIntInst(I);

  }

  void visitPHINodeOrSelectInst(Instruction &I) {

    assert(isa<PHINode>(I) || isa<SelectInst>(I));

  }

  // PHI and select just pass through the pointers.

  void visitPHINode(PHINode &PN) { enqueueUsers(PN); }

  void visitSelectInst(SelectInst &SI) { enqueueUsers(SI); }


  void visitMemTransferInst(MemTransferInst &II) {

    if (*U == II.getRawDest() && !IsGridConstant)

      PI.setAborted(&II);

    // memcpy/memmove are OK when the pointer is source. We can convert them to

    // AS-specific memcpy.

  }


  void visitMemSetInst(MemSetInst &II) {

    if (!IsGridConstant)

      PI.setAborted(&II);

  }

}; // struct ArgUseChecker


void copyByValParam(Function &F, Argument &Arg) {

  LLVM_DEBUG(dbgs() << "Creating a local copy of " << Arg << "\n");

  // Otherwise we have to create a temporary copy.

  BasicBlock::iterator FirstInst = F.getEntryBlock().begin();

  Type *StructType = Arg.getParamByValType();

  const DataLayout &DL = F.getDataLayout();

  AllocaInst *AllocA = new AllocaInst(StructType, DL.getAllocaAddrSpace(),

                                      Arg.getName(), FirstInst);

  // Set the alignment to alignment of the byval parameter. This is because,

  // later load/stores assume that alignment, and we are going to replace

  // the use of the byval parameter with this alloca instruction.

  AllocA->setAlignment(F.getParamAlign(Arg.getArgNo())

                           .value_or(DL.getPrefTypeAlign(StructType)));

  Arg.replaceAllUsesWith(AllocA);


  Value *ArgInParam = new AddrSpaceCastInst(

      &Arg, PointerType::get(Arg.getContext(), ADDRESS_SPACE_PARAM),

      Arg.getName(), FirstInst);

  // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX

  // addrspacecast preserves alignment.  Since params are constant, this load

  // is definitely not volatile.

  const auto ArgSize = *AllocA->getAllocationSize(DL);

  IRBuilder<> IRB(&*FirstInst);

  IRB.CreateMemCpy(AllocA, AllocA->getAlign(), ArgInParam, AllocA->getAlign(),

                   ArgSize);

}

} // namespace


void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,

                                      Argument *Arg) {

  Function *Func = Arg->getParent();

  bool HasCvtaParam =

      TM.getSubtargetImpl(*Func)->hasCvtaParam() && isKernelFunction(*Func);

  bool IsGridConstant = HasCvtaParam && isParamGridConstant(*Arg);

  const DataLayout &DL = Func->getDataLayout();

  BasicBlock::iterator FirstInst = Func->getEntryBlock().begin();

  Type *StructType = Arg->getParamByValType();

  assert(StructType && "Missing byval type");


  ArgUseChecker AUC(DL, IsGridConstant);

  ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(*Arg);

  bool ArgUseIsReadOnly = !(PI.isEscaped() || PI.isAborted());

  // Easy case, accessing parameter directly is fine.

  if (ArgUseIsReadOnly && AUC.Conditionals.empty()) {

    // Convert all loads and intermediate operations to use parameter AS and

    // skip creation of a local copy of the argument.

    SmallVector<Use *, 16> UsesToUpdate;

    for (Use &U : Arg->uses())

      UsesToUpdate.push_back(&U);


    Value *ArgInParamAS = new AddrSpaceCastInst(

        Arg, PointerType::get(StructType->getContext(), ADDRESS_SPACE_PARAM),

        Arg->getName(), FirstInst);

    for (Use *U : UsesToUpdate)

      convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant);

    LLVM_DEBUG(dbgs() << "No need to copy or cast " << *Arg << "\n");


    const auto *TLI =

        cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());


    adjustByValArgAlignment(Arg, ArgInParamAS, TLI);


    return;

  }


  // We can't access byval arg directly and need a pointer. on sm_70+ we have

  // ability to take a pointer to the argument without making a local copy.

  // However, we're still not allowed to write to it. If the user specified

  // `__grid_constant__` for the argument, we'll consider escaped pointer as

  // read-only.

  if (HasCvtaParam && (ArgUseIsReadOnly || IsGridConstant)) {

    LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n");

    // Replace all argument pointer uses (which might include a device function

    // call) with a cast to the generic address space using cvta.param

    // instruction, which avoids a local copy.

    IRBuilder<> IRB(&Func->getEntryBlock().front());


    // Cast argument to param address space

    auto *CastToParam = cast<AddrSpaceCastInst>(IRB.CreateAddrSpaceCast(

        Arg, IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getName() + ".param"));


    // Cast param address to generic address space. We do not use an

    // addrspacecast to generic here, because, LLVM considers `Arg` to be in the

    // generic address space, and a `generic -> param` cast followed by a `param

    // -> generic` cast will be folded away. The `param -> generic` intrinsic

    // will be correctly lowered to `cvta.param`.

    Value *CvtToGenCall = IRB.CreateIntrinsic(

        IRB.getPtrTy(ADDRESS_SPACE_GENERIC), Intrinsic::nvvm_ptr_param_to_gen,

        CastToParam, nullptr, CastToParam->getName() + ".gen");


    Arg->replaceAllUsesWith(CvtToGenCall);


    // Do not replace Arg in the cast to param space

    CastToParam->setOperand(0, Arg);

  } else

    copyByValParam(*Func, *Arg);

}


void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {

  if (Ptr->getType()->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC)

    return;


  // Deciding where to emit the addrspacecast pair.

  BasicBlock::iterator InsertPt;

  if (Argument *Arg = dyn_cast<Argument>(Ptr)) {

    // Insert at the functon entry if Ptr is an argument.

    InsertPt = Arg->getParent()->getEntryBlock().begin();

  } else {

    // Insert right after Ptr if Ptr is an instruction.

    InsertPt = ++cast<Instruction>(Ptr)->getIterator();

    assert(InsertPt != InsertPt->getParent()->end() &&

           "We don't call this function with Ptr being a terminator.");

  }


  Instruction *PtrInGlobal = new AddrSpaceCastInst(

      Ptr, PointerType::get(Ptr->getContext(), ADDRESS_SPACE_GLOBAL),

      Ptr->getName(), InsertPt);

  Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),

                                              Ptr->getName(), InsertPt);

  // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.

  Ptr->replaceAllUsesWith(PtrInGeneric);

  PtrInGlobal->setOperand(0, Ptr);

}


// =============================================================================

// Main function for this pass.

// =============================================================================

bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,

                                         Function &F) {

  // Copying of byval aggregates + SROA may result in pointers being loaded as

  // integers, followed by intotoptr. We may want to mark those as global, too,

  // but only if the loaded integer is used exclusively for conversion to a

  // pointer with inttoptr.

  auto HandleIntToPtr = [this](Value &V) {

    if (llvm::all_of(V.users(), [](User *U) { return isa<IntToPtrInst>(U); })) {

      SmallVector<User *, 16> UsersToUpdate(V.users());

      for (User *U : UsersToUpdate)

        markPointerAsGlobal(U);

    }

  };

  if (TM.getDrvInterface() == NVPTX::CUDA) {

    // Mark pointers in byval structs as global.

    for (auto &B : F) {

      for (auto &I : B) {

        if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {

          if (LI->getType()->isPointerTy() || LI->getType()->isIntegerTy()) {

            Value *UO = getUnderlyingObject(LI->getPointerOperand());

            if (Argument *Arg = dyn_cast<Argument>(UO)) {

              if (Arg->hasByValAttr()) {

                // LI is a load from a pointer within a byval kernel parameter.

                if (LI->getType()->isPointerTy())

                  markPointerAsGlobal(LI);

                else

                  HandleIntToPtr(*LI);

              }

            }

          }

        }

      }

    }

  }


  LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");

  for (Argument &Arg : F.args()) {

    if (Arg.getType()->isPointerTy()) {

      if (Arg.hasByValAttr())

        handleByValParam(TM, &Arg);

      else if (TM.getDrvInterface() == NVPTX::CUDA)

        markPointerAsGlobal(&Arg);

    } else if (Arg.getType()->isIntegerTy() &&

               TM.getDrvInterface() == NVPTX::CUDA) {

      HandleIntToPtr(Arg);

    }

  }

  return true;

}


// Device functions only need to copy byval args into local memory.

bool NVPTXLowerArgs::runOnDeviceFunction(const NVPTXTargetMachine &TM,

                                         Function &F) {

  LLVM_DEBUG(dbgs() << "Lowering function args of " << F.getName() << "\n");

  for (Argument &Arg : F.args())

    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())

      handleByValParam(TM, &Arg);

  return true;

}


bool NVPTXLowerArgs::runOnFunction(Function &F) {

  auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();


  return isKernelFunction(F) ? runOnKernelFunction(TM, F)

                             : runOnDeviceFunction(TM, F);

}


FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }


static bool copyFunctionByValArgs(Function &F) {

  LLVM_DEBUG(dbgs() << "Creating a copy of byval args of " << F.getName()

                    << "\n");

  bool Changed = false;

  for (Argument &Arg : F.args())

    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr() &&

        !(isParamGridConstant(Arg) && isKernelFunction(F))) {

      copyByValParam(F, Arg);

      Changed = true;

    }

  return Changed;

}


PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,

                                              FunctionAnalysisManager &AM) {

  return copyFunctionByValArgs(F) ? PreservedAnalyses::none()

                                  : PreservedAnalyses::all();

}

PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:100

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:170

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

Function.h

IntrinsicInst.h

Type.h

InitializePasses.h

Instructions.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

NVPTXBaseInfo.h

arguments
nvptx lower Lower arguments(NVPTX)"

args
nvptx lower args
Definition: NVPTXLowerArgs.cpp:199

adjustByValArgAlignment
static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, const NVPTXTargetLowering *TLI)
Definition: NVPTXLowerArgs.cpp:370

copyFunctionByValArgs
static bool copyFunctionByValArgs(Function &F)
Definition: NVPTXLowerArgs.cpp:742

convertToParamAS
nvptx lower Lower static false void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam, bool IsGridConstant)
Definition: NVPTXLowerArgs.cpp:224

NVPTXTargetMachine.h

NVPTXUtilities.h

NVPTX.h

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:51

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PtrUseVisitor.h
This file provides a collection of visitors which walk the (instruction) uses of a pointer.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

ValueTracking.h

PointerType
Definition: ItaniumDemangle.h:627

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475

llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200

llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:4926

llvm::AddrSpaceCastInst::getDestAddressSpace
unsigned getDestAddressSpace() const
Returns the address space of the result.
Definition: Instructions.h:4973

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:63

llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124

llvm::AllocaInst::getAllocationSize
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
Definition: Instructions.cpp:64

llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:128

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::Argument::getAttribute
Attribute getAttribute(Attribute::AttrKind Kind) const
Definition: Function.cpp:357

llvm::Argument::addAttr
void addAttr(Attribute::AttrKind Kind)
Definition: Function.cpp:331

llvm::Argument::hasByValAttr
bool hasByValAttr() const
Return true if this argument has the byval attribute.
Definition: Function.cpp:144

llvm::Argument::removeAttr
void removeAttr(Attribute::AttrKind Kind)
Remove attributes from an argument.
Definition: Function.cpp:339

llvm::Argument::getParent
const Function * getParent() const
Definition: Argument.h:43

llvm::Argument::getArgNo
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:49

llvm::Argument::getParamByValType
Type * getParamByValType() const
If this is a byval argument, return its type.
Definition: Function.cpp:235

llvm::Attribute::getValueAsInt
uint64_t getValueAsInt() const
Return the attribute's value as an integer.
Definition: Attributes.cpp:371

llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:461

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:1514

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:63

llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:809

llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956

llvm::IRBuilderBase::CreateMemTransferInst
CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Definition: IRBuilder.cpp:212

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::InstVisitor< DerivedT >::visitPtrToIntInst
void visitPtrToIntInst(PtrToIntInst &I)
Definition: InstVisitor.h:185

llvm::InstVisitor< DerivedT >::visit
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87

llvm::InstVisitor< DerivedT >::visitPHINode
void visitPHINode(PHINode &I)
Definition: InstVisitor.h:175

llvm::InstVisitor< DerivedT >::visitAddrSpaceCastInst
void visitAddrSpaceCastInst(AddrSpaceCastInst &I)
Definition: InstVisitor.h:188

llvm::InstVisitor< DerivedT >::visitMemTransferInst
void visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:217

llvm::InstVisitor< DerivedT >::visitMemSetInst
void visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:209

llvm::InstVisitor< DerivedT >::visitSelectInst
void visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189

llvm::Instruction
Definition: Instruction.h:68

llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:42

llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:1237

llvm::MemTransferInst
This class wraps the llvm.memcpy/memmove intrinsics.
Definition: IntrinsicInst.h:1302

llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:99

llvm::NVPTXTargetLowering::getFunctionParamOptimizedAlign
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL) const
getFunctionParamOptimizedAlign - since function arguments are passed via .param space,...
Definition: NVPTXISelLowering.cpp:4279

llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25

llvm::PHINode
Definition: Instructions.h:2600

llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:4851

llvm::PtrUseVisitor
A base class for visitors over the uses of a pointer value.
Definition: PtrUseVisitor.h:209

llvm::PtrUseVisitor::visitAddrSpaceCastInst
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
Definition: PtrUseVisitor.h:264

llvm::PtrUseVisitor::visitStoreInst
void visitStoreInst(StoreInst &SI)
Definition: PtrUseVisitor.h:255

llvm::PtrUseVisitor::visitPtrToIntInst
void visitPtrToIntInst(PtrToIntInst &I)
Definition: PtrUseVisitor.h:268

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1657

llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:94

llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:97

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:673

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:292

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:218

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::User
Definition: User.h:44

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:233

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::detail::PtrUseVisitorBase::enqueueUsers
void enqueueUsers(Value &I)
Enqueue the users of this instruction in the visit worklist.
Definition: PtrUseVisitor.cpp:20

llvm::detail::PtrUseVisitorBase::PI
PtrInfo PI
The info collected about the pointer being visited thus far.
Definition: PtrUseVisitor.h:126

uint64_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::Intrinsic::getOrInsertDeclaration
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:732

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::NVPTXAS::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition: NVPTXAddrSpace.h:27

llvm::NVPTXAS::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXAddrSpace.h:21

llvm::NVPTXAS::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXAddrSpace.h:22

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739

llvm::isParamGridConstant
bool isParamGridConstant(const Value &V)
Definition: NVPTXUtilities.cpp:182

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448

llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Definition: ValueTracking.cpp:6775

llvm::initializeNVPTXLowerArgsPass
void initializeNVPTXLowerArgsPass(PassRegistry &)

llvm::createNVPTXLowerArgsPass
FunctionPass * createNVPTXLowerArgsPass()
Definition: NVPTXLowerArgs.cpp:740

llvm::HexPrintStyle::Lower
@ Lower

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:313

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85

llvm::NVPTXCopyByValArgsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: NVPTXLowerArgs.cpp:755