doxygen/AMDGPUPromoteKernelArguments_8cpp_source.html

//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass recursively promotes generic pointer arguments of a kernel

/// into the global address space.

///

/// The pass walks kernel's pointer arguments, then loads from them. If a loaded

/// value is a pointer and loaded pointer is unmodified in the kernel before the

/// load, then promote loaded pointer to global. Then recursively continue.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUMemoryUtils.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/InitializePasses.h"


#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"


using namespace llvm;


namespace {


class AMDGPUPromoteKernelArguments : public FunctionPass {

  MemorySSA *MSSA;


  AliasAnalysis *AA;


  Instruction *ArgCastInsertPt;


  SmallVector<Value *> Ptrs;


  void enqueueUsers(Value *Ptr);


  bool promotePointer(Value *Ptr);


  bool promoteLoad(LoadInst *LI);


public:

  static char ID;


  AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}


  bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<AAResultsWrapperPass>();

    AU.addRequired<MemorySSAWrapperPass>();

    AU.setPreservesAll();

  }

};


} // end anonymous namespace


void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {

  SmallVector<User *> PtrUsers(Ptr->users());


  while (!PtrUsers.empty()) {

    Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());

    if (!U)

      continue;


    switch (U->getOpcode()) {

    default:

      break;

    case Instruction::Load: {

      LoadInst *LD = cast<LoadInst>(U);

      if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&

          !AMDGPU::isClobberedInFunction(LD, MSSA, AA))

        Ptrs.push_back(LD);


      break;

    }

    case Instruction::GetElementPtr:

    case Instruction::AddrSpaceCast:

    case Instruction::BitCast:

      if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)

        PtrUsers.append(U->user_begin(), U->user_end());

      break;

    }

  }

}


bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {

  bool Changed = false;


  LoadInst *LI = dyn_cast<LoadInst>(Ptr);

  if (LI)

    Changed |= promoteLoad(LI);


  PointerType *PT = dyn_cast<PointerType>(Ptr->getType());

  if (!PT)

    return Changed;


  if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||

      PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||

      PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)

    enqueueUsers(Ptr);


  if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)

    return Changed;


  IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())

                   : ArgCastInsertPt);


  // Cast pointer to global address space and back to flat and let

  // Infer Address Spaces pass to do all necessary rewriting.

  PointerType *NewPT =

      PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);

  Value *Cast =

      B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));

  Value *CastBack =

      B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));

  Ptr->replaceUsesWithIf(CastBack,

                         [Cast](Use &U) { return U.getUser() != Cast; });


  return true;

}


bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {

  if (!LI->isSimple())

    return false;


  LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));

  return true;

}


// skip allocas

static BasicBlock::iterator getInsertPt(BasicBlock &BB) {

  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();

  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {

    AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);


    // If this is a dynamic alloca, the value may depend on the loaded kernargs,

    // so loads will need to be inserted before it.

    if (!AI || !AI->isStaticAlloca())

      break;

  }


  return InsPt;

}


bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,

                                       AliasAnalysis &AA) {

  if (skipFunction(F))

    return false;


  CallingConv::ID CC = F.getCallingConv();

  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())

    return false;


  ArgCastInsertPt = &*getInsertPt(*F.begin());

  this->MSSA = &MSSA;

  this->AA = &AA;


  for (Argument &Arg : F.args()) {

    if (Arg.use_empty())

      continue;


    PointerType *PT = dyn_cast<PointerType>(Arg.getType());

    if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&

                PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&

                PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))

      continue;


    Ptrs.push_back(&Arg);

  }


  bool Changed = false;

  while (!Ptrs.empty()) {

    Value *Ptr = Ptrs.pop_back_val();

    Changed |= promotePointer(Ptr);

  }


  return Changed;

}


bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {

  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();

  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

  return run(F, MSSA, AA);

}


INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,

                      "AMDGPU Promote Kernel Arguments", false, false)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)

INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,

                    "AMDGPU Promote Kernel Arguments", false, false)


char AMDGPUPromoteKernelArguments::ID = 0;


FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {

  return new AMDGPUPromoteKernelArguments();

}


PreservedAnalyses

AMDGPUPromoteKernelArgumentsPass::run(Function &F,

                                      FunctionAnalysisManager &AM) {

  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();

  AliasAnalysis &AA = AM.getResult<AAManager>(F);

  if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {

    PreservedAnalyses PA;

    PA.preserveSet<CFGAnalyses>();

    PA.preserve<MemorySSAAnalysis>();

    return PA;

  }

  return PreservedAnalyses::all();

}

AMDGPUMemoryUtils.h

Arguments
AMDGPU Promote Kernel Arguments
Definition: AMDGPUPromoteKernelArguments.cpp:199

getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition: AMDGPUPromoteKernelArguments.cpp:139

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPromoteKernelArguments.cpp:26

AMDGPU.h

AliasAnalysis.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

IRBuilder.h

InitializePasses.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

SmallVector.h
This file defines the SmallVector class.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

PointerType
Definition: ItaniumDemangle.h:627

llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:933

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:981

llvm::AAResults
Definition: AliasAnalysis.h:314

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:63

llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition: Instructions.cpp:1234

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:464

llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:426

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:63

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:247

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928

llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:985

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146

llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075

unsigned

false
Definition: StackSlotColoring.cpp:193

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPUAddrSpace.h:31

llvm::AMDGPU::isClobberedInFunction
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
Definition: AMDGPUMemoryUtils.cpp:389

llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::M68k::MemAddrModeKind::U
@ U

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:191

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:203

llvm::AMDGPUPromoteKernelArgumentsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPromoteKernelArguments.cpp:208