doxygen/AMDGPULowerKernelArguments_8cpp_source.html

//===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass replaces accesses to kernel arguments with loads from

/// offsets from the kernarg base pointer.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/Attributes.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/Target/TargetMachine.h"


#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"


using namespace llvm;


namespace {


class PreloadKernelArgInfo {

private:

  Function &F;

  const GCNSubtarget &ST;

  unsigned NumFreeUserSGPRs;


  enum HiddenArg : unsigned {

    HIDDEN_BLOCK_COUNT_X,

    HIDDEN_BLOCK_COUNT_Y,

    HIDDEN_BLOCK_COUNT_Z,

    HIDDEN_GROUP_SIZE_X,

    HIDDEN_GROUP_SIZE_Y,

    HIDDEN_GROUP_SIZE_Z,

    HIDDEN_REMAINDER_X,

    HIDDEN_REMAINDER_Y,

    HIDDEN_REMAINDER_Z,

    END_HIDDEN_ARGS

  };


  // Stores information about a specific hidden argument.

  struct HiddenArgInfo {

    // Offset in bytes from the location in the kernearg segment pointed to by

    // the implicitarg pointer.

    uint8_t Offset;

    // The size of the hidden argument in bytes.

    uint8_t Size;

    // The name of the hidden argument in the kernel signature.

    const char *Name;

  };


  static constexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {

      {0, 4, "_hidden_block_count_x"}, {4, 4, "_hidden_block_count_y"},

      {8, 4, "_hidden_block_count_z"}, {12, 2, "_hidden_group_size_x"},

      {14, 2, "_hidden_group_size_y"}, {16, 2, "_hidden_group_size_z"},

      {18, 2, "_hidden_remainder_x"},  {20, 2, "_hidden_remainder_y"},

      {22, 2, "_hidden_remainder_z"}};


  static HiddenArg getHiddenArgFromOffset(unsigned Offset) {

    for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)

      if (HiddenArgs[I].Offset == Offset)

        return static_cast<HiddenArg>(I);


    return END_HIDDEN_ARGS;

  }


  static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {

    if (HA < END_HIDDEN_ARGS)

      return Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);


    llvm_unreachable("Unexpected hidden argument.");

  }


  static const char *getHiddenArgName(HiddenArg HA) {

    if (HA < END_HIDDEN_ARGS) {

      return HiddenArgs[HA].Name;

    }

    llvm_unreachable("Unexpected hidden argument.");

  }


  // Clones the function after adding implicit arguments to the argument list

  // and returns the new updated function. Preloaded implicit arguments are

  // added up to and including the last one that will be preloaded, indicated by

  // LastPreloadIndex. Currently preloading is only performed on the totality of

  // sequential data from the kernarg segment including implicit (hidden)

  // arguments. This means that all arguments up to the last preloaded argument

  // will also be preloaded even if that data is unused.

  Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) {

    FunctionType *FT = F.getFunctionType();

    LLVMContext &Ctx = F.getParent()->getContext();

    SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());

    for (unsigned I = 0; I <= LastPreloadIndex; ++I)

      FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I)));


    FunctionType *NFT =

        FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());

    Function *NF =

        Function::Create(NFT, F.getLinkage(), F.getAddressSpace(), F.getName());


    NF->copyAttributesFrom(&F);

    NF->copyMetadata(&F, 0);

    NF->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);


    F.getParent()->getFunctionList().insert(F.getIterator(), NF);

    NF->takeName(&F);

    NF->splice(NF->begin(), &F);


    Function::arg_iterator NFArg = NF->arg_begin();

    for (Argument &Arg : F.args()) {

      Arg.replaceAllUsesWith(&*NFArg);

      NFArg->takeName(&Arg);

      ++NFArg;

    }


    AttrBuilder AB(Ctx);

    AB.addAttribute(Attribute::InReg);

    AB.addAttribute("amdgpu-hidden-argument");

    AttributeList AL = NF->getAttributes();

    for (unsigned I = 0; I <= LastPreloadIndex; ++I) {

      AL = AL.addParamAttributes(Ctx, NFArg->getArgNo(), AB);

      NFArg++->setName(getHiddenArgName(HiddenArg(I)));

    }


    NF->setAttributes(AL);

    F.replaceAllUsesWith(NF);

    F.setCallingConv(CallingConv::C);


    return NF;

  }


public:

  PreloadKernelArgInfo(Function &F, const GCNSubtarget &ST) : F(F), ST(ST) {

    setInitialFreeUserSGPRsCount();

  }


  // Returns the maximum number of user SGPRs that we have available to preload

  // arguments.

  void setInitialFreeUserSGPRsCount() {

    GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);

    NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs();

  }


  bool tryAllocPreloadSGPRs(unsigned AllocSize, uint64_t ArgOffset,

                            uint64_t LastExplicitArgOffset) {

    //  Check if this argument may be loaded into the same register as the

    //  previous argument.

    if (ArgOffset - LastExplicitArgOffset < 4 &&

        !isAligned(Align(4), ArgOffset))

      return true;


    // Pad SGPRs for kernarg alignment.

    ArgOffset = alignDown(ArgOffset, 4);

    unsigned Padding = ArgOffset - LastExplicitArgOffset;

    unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;

    unsigned NumPreloadSGPRs = alignTo(AllocSize, 4) / 4;

    if (NumPreloadSGPRs + PaddingSGPRs > NumFreeUserSGPRs)

      return false;


    NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);

    return true;

  }


  // Try to allocate SGPRs to preload implicit kernel arguments.

  void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,

                                       uint64_t LastExplicitArgOffset,

                                       IRBuilder<> &Builder) {

    Function *ImplicitArgPtr = Intrinsic::getDeclarationIfExists(

        F.getParent(), Intrinsic::amdgcn_implicitarg_ptr);

    if (!ImplicitArgPtr)

      return;


    const DataLayout &DL = F.getParent()->getDataLayout();

    // Pair is the load and the load offset.

    SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;

    for (auto *U : ImplicitArgPtr->users()) {

      Instruction *CI = dyn_cast<Instruction>(U);

      if (!CI || CI->getParent()->getParent() != &F)

        continue;


      for (auto *U : CI->users()) {

        int64_t Offset = 0;

        auto *Load = dyn_cast<LoadInst>(U); // Load from ImplicitArgPtr?

        if (!Load) {

          if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI)

            continue;


          Load = dyn_cast<LoadInst>(*U->user_begin()); // Load from GEP?

        }


        if (!Load || !Load->isSimple())

          continue;


        // FIXME: Expand to handle 64-bit implicit args and large merged loads.

        LLVMContext &Ctx = F.getParent()->getContext();

        Type *LoadTy = Load->getType();

        HiddenArg HA = getHiddenArgFromOffset(Offset);

        if (HA == END_HIDDEN_ARGS || LoadTy != getHiddenArgType(Ctx, HA))

          continue;


        ImplicitArgLoads.push_back(std::make_pair(Load, Offset));

      }

    }


    if (ImplicitArgLoads.empty())

      return;


    // Allocate loads in order of offset. We need to be sure that the implicit

    // argument can actually be preloaded.

    std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(), less_second());


    // If we fail to preload any implicit argument we know we don't have SGPRs

    // to preload any subsequent ones with larger offsets. Find the first

    // argument that we cannot preload.

    auto *PreloadEnd = std::find_if(

        ImplicitArgLoads.begin(), ImplicitArgLoads.end(),

        [&](const std::pair<LoadInst *, unsigned> &Load) {

          unsigned LoadSize = DL.getTypeStoreSize(Load.first->getType());

          unsigned LoadOffset = Load.second;

          if (!tryAllocPreloadSGPRs(LoadSize,

                                    LoadOffset + ImplicitArgsBaseOffset,

                                    LastExplicitArgOffset))

            return true;


          LastExplicitArgOffset =

              ImplicitArgsBaseOffset + LoadOffset + LoadSize;

          return false;

        });


    if (PreloadEnd == ImplicitArgLoads.begin())

      return;


    unsigned LastHiddenArgIndex = getHiddenArgFromOffset(PreloadEnd[-1].second);

    Function *NF = cloneFunctionWithPreloadImplicitArgs(LastHiddenArgIndex);

    assert(NF);

    for (const auto *I = ImplicitArgLoads.begin(); I != PreloadEnd; ++I) {

      LoadInst *LoadInst = I->first;

      unsigned LoadOffset = I->second;

      unsigned HiddenArgIndex = getHiddenArgFromOffset(LoadOffset);

      unsigned Index = NF->arg_size() - LastHiddenArgIndex + HiddenArgIndex - 1;

      Argument *Arg = NF->getArg(Index);

      LoadInst->replaceAllUsesWith(Arg);

    }

  }

};


class AMDGPULowerKernelArguments : public FunctionPass {

public:

  static char ID;


  AMDGPULowerKernelArguments() : FunctionPass(ID) {}


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<TargetPassConfig>();

    AU.setPreservesAll();

 }

};


} // end anonymous namespace


// skip allocas

static BasicBlock::iterator getInsertPt(BasicBlock &BB) {

  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();

  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {

    AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);


    // If this is a dynamic alloca, the value may depend on the loaded kernargs,

    // so loads will need to be inserted before it.

    if (!AI || !AI->isStaticAlloca())

      break;

  }


  return InsPt;

}


static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {

  CallingConv::ID CC = F.getCallingConv();

  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())

    return false;


  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

  LLVMContext &Ctx = F.getParent()->getContext();

  const DataLayout &DL = F.getDataLayout();

  BasicBlock &EntryBlock = *F.begin();

  IRBuilder<> Builder(&EntryBlock, getInsertPt(EntryBlock));


  const Align KernArgBaseAlign(16); // FIXME: Increase if necessary

  const uint64_t BaseOffset = ST.getExplicitKernelArgOffset();


  Align MaxAlign;

  // FIXME: Alignment is broken with explicit arg offset.;

  const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);

  if (TotalKernArgSize == 0)

    return false;


  CallInst *KernArgSegment =

      Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},

                              nullptr, F.getName() + ".kernarg.segment");

  KernArgSegment->addRetAttr(Attribute::NonNull);

  KernArgSegment->addRetAttr(

      Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));


  uint64_t ExplicitArgOffset = 0;

  // Preloaded kernel arguments must be sequential.

  bool InPreloadSequence = true;

  PreloadKernelArgInfo PreloadInfo(F, ST);


  for (Argument &Arg : F.args()) {

    const bool IsByRef = Arg.hasByRefAttr();

    Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

    MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;

    Align ABITypeAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);


    uint64_t Size = DL.getTypeSizeInBits(ArgTy);

    uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);


    uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;

    uint64_t LastExplicitArgOffset = ExplicitArgOffset;

    ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;


    // Guard against the situation where hidden arguments have already been

    // lowered and added to the kernel function signiture, i.e. in a situation

    // where this pass has run twice.

    if (Arg.hasAttribute("amdgpu-hidden-argument"))

      break;


    // Try to preload this argument into user SGPRs.

    if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&

        !Arg.getType()->isAggregateType())

      if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,

                                           LastExplicitArgOffset))

        continue;


    InPreloadSequence = false;


    if (Arg.use_empty())

      continue;


    // If this is byval, the loads are already explicit in the function. We just

    // need to rewrite the pointer values.

    if (IsByRef) {

      Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(

          Builder.getInt8Ty(), KernArgSegment, EltOffset,

          Arg.getName() + ".byval.kernarg.offset");


      Value *CastOffsetPtr =

          Builder.CreateAddrSpaceCast(ArgOffsetPtr, Arg.getType());

      Arg.replaceAllUsesWith(CastOffsetPtr);

      continue;

    }


    if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {

      // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing

      // modes on SI to know the high bits are 0 so pointer adds don't wrap. We

      // can't represent this with range metadata because it's only allowed for

      // integer types.

      if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||

           PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&

          !ST.hasUsableDSOffset())

        continue;


      // FIXME: We can replace this with equivalent alias.scope/noalias

      // metadata, but this appears to be a lot of work.

      if (Arg.hasNoAliasAttr())

        continue;

    }


    auto *VT = dyn_cast<FixedVectorType>(ArgTy);

    bool IsV3 = VT && VT->getNumElements() == 3;

    bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();


    VectorType *V4Ty = nullptr;


    int64_t AlignDownOffset = alignDown(EltOffset, 4);

    int64_t OffsetDiff = EltOffset - AlignDownOffset;

    Align AdjustedAlign = commonAlignment(

        KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);


    Value *ArgPtr;

    Type *AdjustedArgTy;

    if (DoShiftOpt) { // FIXME: Handle aggregate types

      // Since we don't have sub-dword scalar loads, avoid doing an extload by

      // loading earlier than the argument address, and extracting the relevant

      // bits.

      // TODO: Update this for GFX12 which does have scalar sub-dword loads.

      //

      // Additionally widen any sub-dword load to i32 even if suitably aligned,

      // so that CSE between different argument loads works easily.

      ArgPtr = Builder.CreateConstInBoundsGEP1_64(

          Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,

          Arg.getName() + ".kernarg.offset.align.down");

      AdjustedArgTy = Builder.getInt32Ty();

    } else {

      ArgPtr = Builder.CreateConstInBoundsGEP1_64(

          Builder.getInt8Ty(), KernArgSegment, EltOffset,

          Arg.getName() + ".kernarg.offset");

      AdjustedArgTy = ArgTy;

    }


    if (IsV3 && Size >= 32) {

      V4Ty = FixedVectorType::get(VT->getElementType(), 4);

      // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads

      AdjustedArgTy = V4Ty;

    }


    LoadInst *Load =

        Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);

    Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));


    MDBuilder MDB(Ctx);


    if (Arg.hasAttribute(Attribute::NoUndef))

      Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));


    if (Arg.hasAttribute(Attribute::Range)) {

      const ConstantRange &Range =

          Arg.getAttribute(Attribute::Range).getValueAsConstantRange();

      Load->setMetadata(LLVMContext::MD_range,

                        MDB.createRange(Range.getLower(), Range.getUpper()));

    }


    if (isa<PointerType>(ArgTy)) {

      if (Arg.hasNonNullAttr())

        Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));


      uint64_t DerefBytes = Arg.getDereferenceableBytes();

      if (DerefBytes != 0) {

        Load->setMetadata(

          LLVMContext::MD_dereferenceable,

          MDNode::get(Ctx,

                      MDB.createConstant(

                        ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));

      }


      uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();

      if (DerefOrNullBytes != 0) {

        Load->setMetadata(

          LLVMContext::MD_dereferenceable_or_null,

          MDNode::get(Ctx,

                      MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),

                                                          DerefOrNullBytes))));

      }


      if (MaybeAlign ParamAlign = Arg.getParamAlign()) {

        Load->setMetadata(

            LLVMContext::MD_align,

            MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(

                                 Builder.getInt64Ty(), ParamAlign->value()))));

      }

    }


    // TODO: Convert noalias arg to !noalias


    if (DoShiftOpt) {

      Value *ExtractBits = OffsetDiff == 0 ?

        Load : Builder.CreateLShr(Load, OffsetDiff * 8);


      IntegerType *ArgIntTy = Builder.getIntNTy(Size);

      Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);

      Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,

                                            Arg.getName() + ".load");

      Arg.replaceAllUsesWith(NewVal);

    } else if (IsV3) {

      Value *Shuf = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 2},

                                                Arg.getName() + ".load");

      Arg.replaceAllUsesWith(Shuf);

    } else {

      Load->setName(Arg.getName() + ".load");

      Arg.replaceAllUsesWith(Load);

    }

  }


  KernArgSegment->addRetAttr(

      Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));


  if (InPreloadSequence) {

    uint64_t ImplicitArgsBaseOffset =

        alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) +

        BaseOffset;

    PreloadInfo.tryAllocImplicitArgPreloadSGPRs(ImplicitArgsBaseOffset,

                                                ExplicitArgOffset, Builder);

  }


  return true;

}


bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {

  auto &TPC = getAnalysis<TargetPassConfig>();

  const TargetMachine &TM = TPC.getTM<TargetMachine>();

  return lowerKernelArguments(F, TM);

}


INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE,

                      "AMDGPU Lower Kernel Arguments", false, false)

INITIALIZE_PASS_END(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments",

                    false, false)


char AMDGPULowerKernelArguments::ID = 0;


FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {

  return new AMDGPULowerKernelArguments();

}


PreservedAnalyses

AMDGPULowerKernelArgumentsPass::run(Function &F, FunctionAnalysisManager &AM) {

  bool Changed = lowerKernelArguments(F, TM);

  if (Changed) {

    // TODO: Preserves a lot more.

    PreservedAnalyses PA;

    PA.preserveSet<CFGAnalyses>();

    return PA;

  }


  return PreservedAnalyses::all();

}

Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:504

getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition: AMDGPULowerKernelArguments.cpp:271

lowerKernelArguments
static bool lowerKernelArguments(Function &F, const TargetMachine &TM)
Definition: AMDGPULowerKernelArguments.cpp:285

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerKernelArguments.cpp:24

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

IRBuilder.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MDBuilder.h

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

ValueTracking.h

FunctionType
Definition: ItaniumDemangle.h:823

PointerType
Definition: ItaniumDemangle.h:627

VectorType
Definition: ItaniumDemangle.h:1173

llvm::AMDGPULowerKernelArgumentsPass::run
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Definition: AMDGPULowerKernelArguments.cpp:514

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:63

llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition: Instructions.cpp:1234

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::Argument::getArgNo
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:49

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::AttrBuilder
Definition: Attributes.h:1064

llvm::AttributeList
Definition: Attributes.h:490

llvm::Attribute::getWithDereferenceableBytes
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
Definition: Attributes.cpp:244

llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:461

llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72

llvm::CallBase::addRetAttr
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1484

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47

llvm::ConstantRange::getLower
const APInt & getLower() const
Return the lower value for this range.
Definition: ConstantRange.h:203

llvm::ConstantRange::getUpper
const APInt & getUpper() const
Return the upper value for this range.
Definition: ConstantRange.h:206

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:63

llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173

llvm::Function::splice
void splice(Function::iterator ToIt, Function *FromF)
Transfer all blocks from FromF to this function at ToIt.
Definition: Function.h:761

llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353

llvm::Function::begin
iterator begin()
Definition: Function.h:853

llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:868

llvm::Function::setAttributes
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
Definition: Function.h:356

llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:901

llvm::Function::setIsNewDbgInfoFormat
void setIsNewDbgInfoFormat(bool NewVal)
Definition: Function.cpp:105

llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:886

llvm::Function::copyAttributesFrom
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:860

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNUserSGPRUsageInfo
Definition: GCNSubtarget.h:1660

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition: GCNSubtarget.cpp:674

llvm::GlobalObject::copyMetadata
void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
Definition: Metadata.cpp:1799

llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:558

llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815

llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545

llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550

llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152

llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019

llvm::IRBuilderBase::CreateConstInBoundsGEP1_64
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1944

llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535

llvm::IRBuilderBase::CreateAddrSpaceCast
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::Instruction
Definition: Instruction.h:68

llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:42

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::MDBuilder
Definition: MDBuilder.h:36

llvm::MDBuilder::createConstant
ConstantAsMetadata * createConstant(Constant *C)
Return the given constant as metadata.
Definition: MDBuilder.cpp:24

llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:95

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:269

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:267

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:303

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

uint64_t

uint8_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

false
Definition: StackSlotColoring.cpp:193

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::Intrinsic::getDeclarationIfExists
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:746

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:639

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557

llvm::HexPrintStyle::Lower
@ Lower

llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:509

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117

llvm::less_second
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1476