doxygen/AMDGPUAlwaysInlinePass_8cpp_source.html

//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This pass marks all internal functions as always_inline and creates

/// duplicates of all other functions and marks the duplicates as always_inline.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/CommandFlags.h"

#include "llvm/IR/Module.h"

#include "llvm/Pass.h"

#include "llvm/Support/CommandLine.h"


using namespace llvm;


namespace {


static cl::opt<bool> StressCalls(

  "amdgpu-stress-function-calls",

  cl::Hidden,

  cl::desc("Force all functions to be noinline"),

  cl::init(false));


class AMDGPUAlwaysInline : public ModulePass {

  bool GlobalOpt;


public:

  static char ID;


  AMDGPUAlwaysInline(bool GlobalOpt = false) :

    ModulePass(ID), GlobalOpt(GlobalOpt) { }

  bool runOnModule(Module &M) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesAll();

  }

};


} // End anonymous namespace


INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",

                "AMDGPU Inline All Functions", false, false)


char AMDGPUAlwaysInline::ID = 0;


static void


recursivelyVisitUsers(GlobalValue &GV,

                      SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {

  SmallVector<User *, 16> Stack(GV.users());


  SmallPtrSet<const Value *, 8> Visited;


  while (!Stack.empty()) {

    User *U = Stack.pop_back_val();

    if (!Visited.insert(U).second)

      continue;


    if (Instruction *I = dyn_cast<Instruction>(U)) {

      Function *F = I->getFunction();

      if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {

        // FIXME: This is a horrible hack. We should always respect noinline,

        // and just let us hit the error when we can't handle this.

        //

        // Unfortunately, clang adds noinline to all functions at -O0. We have

        // to override this here until that's fixed.

        F->removeFnAttr(Attribute::NoInline);


        FuncsToAlwaysInline.insert(F);

        Stack.push_back(F);

      }


      // No need to look at further users, but we do need to inline any callers.

      continue;

    }


    append_range(Stack, U->users());

  }

}


static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {

  std::vector<GlobalAlias*> AliasesToRemove;


  bool Changed = false;

  SmallPtrSet<Function *, 8> FuncsToAlwaysInline;

  SmallPtrSet<Function *, 8> FuncsToNoInline;

  Triple TT(M.getTargetTriple());


  for (GlobalAlias &A : M.aliases()) {

    if (Function* F = dyn_cast<Function>(A.getAliasee())) {

      if (TT.isAMDGCN() && A.getLinkage() != GlobalValue::InternalLinkage)

        continue;

      Changed = true;

      A.replaceAllUsesWith(F);

      AliasesToRemove.push_back(&A);

    }


    // FIXME: If the aliasee isn't a function, it's some kind of constant expr

    // cast that won't be inlined through.

  }


  if (GlobalOpt) {

    for (GlobalAlias* A : AliasesToRemove) {

      A->eraseFromParent();

    }

  }


  // Always force inlining of any function that uses an LDS global address. This

  // is something of a workaround because we don't have a way of supporting LDS

  // objects defined in functions. LDS is always allocated by a kernel, and it

  // is difficult to manage LDS usage if a function may be used by multiple

  // kernels.

  //

  // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this

  // should only appear when IPO passes manages to move LDs defined in a kernel

  // into a single user function.


  for (GlobalVariable &GV : M.globals()) {

    // TODO: Region address

    unsigned AS = GV.getAddressSpace();

    if ((AS == AMDGPUAS::REGION_ADDRESS) ||

        (AS == AMDGPUAS::LOCAL_ADDRESS &&

         (!AMDGPUTargetMachine::EnableLowerModuleLDS)))

      recursivelyVisitUsers(GV, FuncsToAlwaysInline);

  }


  if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {

    auto IncompatAttr

      = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;


    for (Function &F : M) {

      if (!F.isDeclaration() && !F.use_empty() &&

          !F.hasFnAttribute(IncompatAttr)) {

        if (StressCalls) {

          if (!FuncsToAlwaysInline.count(&F))

            FuncsToNoInline.insert(&F);

        } else

          FuncsToAlwaysInline.insert(&F);

      }

    }

  }


  for (Function *F : FuncsToAlwaysInline)

    F->addFnAttr(Attribute::AlwaysInline);


  for (Function *F : FuncsToNoInline)

    F->addFnAttr(Attribute::NoInline);


  return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();

}


bool AMDGPUAlwaysInline::runOnModule(Module &M) {

  return alwaysInlineImpl(M, GlobalOpt);

}


ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {

  return new AMDGPUAlwaysInline(GlobalOpt);

}


PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,

                                              ModuleAnalysisManager &AM) {

  const bool Changed = alwaysInlineImpl(M, GlobalOpt);

  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();

}


alwaysInlineImpl
static bool alwaysInlineImpl(Module &M, bool GlobalOpt)
Definition AMDGPUAlwaysInlinePass.cpp:89

recursivelyVisitUsers
static INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", "AMDGPU Inline All Functions", false, false) char AMDGPUAlwaysInline void recursivelyVisitUsers(GlobalValue &GV, SmallPtrSetImpl< Function * > &FuncsToAlwaysInline)
Definition AMDGPUAlwaysInlinePass.cpp:56

AMDGPUBaseInfo.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CommandFlags.h

CommandLine.h

Module.h
Module.h This file contains the declarations for the Module class.

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

Module
Machine Check Debug Module
Definition MachineCheckDebugify.cpp:124

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

Pass.h

char

llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition AMDGPUTargetMachine.h:38

llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition AMDGPUTargetMachine.h:39

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition PassAnalysisSupport.h:131

llvm::Function
Definition Function.h:64

llvm::GlobalAlias
Definition GlobalAlias.h:29

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60

llvm::GlobalVariable
Definition GlobalVariable.h:41

llvm::Instruction
Definition Instruction.h:69

llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition SmallPtrSet.h:98

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition SmallPtrSet.h:368

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition SmallPtrSet.h:455

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1205

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::User
Definition User.h:44

llvm::cl::opt
Definition CommandLine.h:1454

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1513

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198

llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition AMDGPUAlwaysInlinePass.cpp:164

llvm::ModuleAnalysisManager
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39

llvm::AMDGPUAlwaysInlinePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition AMDGPUAlwaysInlinePass.cpp:168

llvm::cl::desc
Definition CommandLine.h:410