docs/doxygen/AMDGPUPromoteKernelArguments_8cpp_source.html

//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass recursively promotes generic pointer arguments of a kernel

/// into the global address space.

///

/// The pass walks kernel's pointer arguments, then loads from them. If a loaded

/// value is a pointer and loaded pointer is unmodified in the kernel before the

/// load, then promote loaded pointer to global. Then recursively continue.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUMemoryUtils.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/InitializePasses.h"


#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"


using namespace llvm;


namespace {


class AMDGPUPromoteKernelArguments : public FunctionPass {

  MemorySSA *MSSA;


  AliasAnalysis *AA;


  Instruction *ArgCastInsertPt;


  SmallVector<Value *> Ptrs;


  void enqueueUsers(Value *Ptr);


  bool promotePointer(Value *Ptr);


  bool promoteLoad(LoadInst *LI);


public:

  static char ID;


  AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}


  bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<AAResultsWrapperPass>();

    AU.addRequired<MemorySSAWrapperPass>();

    AU.setPreservesAll();

  }

};


} // end anonymous namespace


void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {

  SmallVector<User *> PtrUsers(Ptr->users());


  while (!PtrUsers.empty()) {

    Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());

    if (!U)

      continue;


    switch (U->getOpcode()) {

    default:

      break;

    case Instruction::Load: {

      LoadInst *LD = cast<LoadInst>(U);

      if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&

          !AMDGPU::isClobberedInFunction(LD, MSSA, AA))

        Ptrs.push_back(LD);


      break;

    }

    case Instruction::GetElementPtr:

    case Instruction::AddrSpaceCast:

    case Instruction::BitCast:

      if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)

        PtrUsers.append(U->user_begin(), U->user_end());

      break;

    }

  }

}


bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {

  bool Changed = false;


  LoadInst *LI = dyn_cast<LoadInst>(Ptr);

  if (LI)

    Changed |= promoteLoad(LI);


  PointerType *PT = dyn_cast<PointerType>(Ptr->getType());

  if (!PT)

    return Changed;


  if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||

      PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||

      PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)

    enqueueUsers(Ptr);


  if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)

    return Changed;


  IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())

                   : ArgCastInsertPt);


  // Cast pointer to global address space and back to flat and let

  // Infer Address Spaces pass to do all necessary rewriting.

  PointerType *NewPT =

      PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);

  Value *Cast =

      B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));

  Value *CastBack =

      B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));

  Ptr->replaceUsesWithIf(CastBack,

                         [Cast](Use &U) { return U.getUser() != Cast; });


  return true;

}


bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {

  if (!LI->isSimple())

    return false;


  LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));

  return true;

}


// skip allocas


static BasicBlock::iterator getInsertPt(BasicBlock &BB) {

  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();

  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {

    AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);


    // If this is a dynamic alloca, the value may depend on the loaded kernargs,

    // so loads will need to be inserted before it.

    if (!AI || !AI->isStaticAlloca())

      break;

  }


  return InsPt;

}


bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,

                                       AliasAnalysis &AA) {

  if (skipFunction(F))

    return false;


  CallingConv::ID CC = F.getCallingConv();

  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())

    return false;


  ArgCastInsertPt = &*getInsertPt(*F.begin());

  this->MSSA = &MSSA;

  this->AA = &AA;


  for (Argument &Arg : F.args()) {

    if (Arg.use_empty())

      continue;


    PointerType *PT = dyn_cast<PointerType>(Arg.getType());

    if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&

                PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&

                PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))

      continue;


    Ptrs.push_back(&Arg);

  }


  bool Changed = false;

  while (!Ptrs.empty()) {

    Value *Ptr = Ptrs.pop_back_val();

    Changed |= promotePointer(Ptr);

  }


  return Changed;

}


bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {

  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();

  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

  return run(F, MSSA, AA);

}


INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,

                      "AMDGPU Promote Kernel Arguments", false, false)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)

INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,

                    "AMDGPU Promote Kernel Arguments", false, false)


char AMDGPUPromoteKernelArguments::ID = 0;


FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {

  return new AMDGPUPromoteKernelArguments();

}


PreservedAnalyses


AMDGPUPromoteKernelArgumentsPass::run(Function &F,

                                      FunctionAnalysisManager &AM) {

  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();

  AliasAnalysis &AA = AM.getResult<AAManager>(F);

  if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {

    PreservedAnalyses PA;

    PA.preserveSet<CFGAnalyses>();

    PA.preserve<MemorySSAAnalysis>();

    return PA;

  }

  return PreservedAnalyses::all();

}


getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition AMDGPULowerKernelArguments.cpp:47

AMDGPUMemoryUtils.h

AMDGPU.h

AliasAnalysis.h

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

MemorySSA
early cse Early CSE w MemorySSA
Definition EarlyCSE.cpp:1950

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

IRBuilder.h

InitializePasses.h

F
#define F(x, y, z)
Definition MD5.cpp:55

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

SmallVector.h
This file defines the SmallVector class.

Ptr
@ Ptr
Definition TargetLibraryInfo.cpp:77

llvm::AAManager
A manager for alias analyses.
Definition AliasAnalysis.h:974

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition AliasAnalysis.h:1022

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition Instructions.h:65

llvm::AllocaInst::isStaticAlloca
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition Instructions.cpp:1303

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:412

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition PassAnalysisSupport.h:131

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::end
iterator end()
Definition BasicBlock.h:472

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition BasicBlock.cpp:393

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::setMetadata
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition Metadata.cpp:1746

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:181

llvm::LoadInst::isSimple
bool isSimple() const
Definition Instructions.h:252

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936

llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:993

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1203

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099

Changed
Changed
Definition ObjCARCOpts.cpp:2369

false
Definition MachinePipeliner.cpp:244

llvm::AA
Abstract Attribute helper functions.
Definition Attributor.h:165

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition AMDGPUAddrSpace.h:31

llvm::AMDGPU::isClobberedInFunction
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
Definition AMDGPUMemoryUtils.cpp:395

llvm::ARM_MB::LD
@ LD
Definition ARMBaseInfo.h:72

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::M68k::MemAddrModeKind::U
@ U
Definition M68kBaseInfo.h:61

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition PointerTypeAnalysis.cpp:205

llvm::pdb::PDB_SymType::PointerType
@ PointerType
Definition PDBTypes.h:258

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:564

llvm::AliasAnalysis
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
Definition AliasAnalysis.h:721

llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition AMDGPUPromoteKernelArguments.cpp:203

llvm::AMDGPUPromoteKernelArgumentsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition AMDGPUPromoteKernelArguments.cpp:208