31#define DEBUG_TYPE "amdgpu-resource-usage" 
   41    "amdgpu-assume-external-call-stack-size",
 
   46    "amdgpu-assume-dynamic-stack-object-size",
 
   47    cl::desc(
"Assumed extra stack use if there are any " 
   48             "variable sized objects (in bytes)"),
 
   52                "Function register usage analysis", 
true, 
true)
 
   56    assert(Op.getImm() == 0);
 
 
   65    if (!UseOp.isImplicit() || !
TII.isFLAT(*UseOp.getParent()))
 
 
   83  uint32_t AssumedStackSizeForDynamicSizeObjects =
 
   90      AssumedStackSizeForDynamicSizeObjects = 0;
 
   92      AssumedStackSizeForExternalCall = 0;
 
   96      MF, AssumedStackSizeForDynamicSizeObjects,
 
   97      AssumedStackSizeForExternalCall);
 
 
  110  uint32_t AssumedStackSizeForDynamicSizeObjects =
 
  117      AssumedStackSizeForDynamicSizeObjects = 0;
 
  119      AssumedStackSizeForExternalCall = 0;
 
  123      MF, AssumedStackSizeForDynamicSizeObjects,
 
  124      AssumedStackSizeForExternalCall);
 
 
  130    uint32_t AssumedStackSizeForExternalCall)
 const {
 
  140  Info.UsesFlatScratch = 
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
 
  141                         MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
 
  157    Info.UsesFlatScratch = 
false;
 
  160  Info.PrivateSegmentSize = FrameInfo.getStackSize();
 
  163  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
 
  164  if (Info.HasDynamicallySizedStack)
 
  165    Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
 
  168    Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
 
  171      MRI.isPhysRegUsed(AMDGPU::VCC_LO) || 
MRI.isPhysRegUsed(AMDGPU::VCC_HI);
 
  172  Info.NumExplicitSGPR = 
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::SGPR_32RegClass,
 
  174  if (ST.hasMAIInsts())
 
  175    Info.NumAGPR = 
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::AGPR_32RegClass,
 
  181  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
 
  182    Info.NumVGPR = 
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::VGPR_32RegClass,
 
  187  int32_t MaxVGPR = -1;
 
  188  Info.CalleeSegmentSize = 0;
 
  192      for (
unsigned I = 0; 
I < 
MI.getNumOperands(); ++
I) {
 
  200        case AMDGPU::NoRegister:
 
  202                 "Instruction uses invalid noreg register");
 
  205        case AMDGPU::XNACK_MASK:
 
  206        case AMDGPU::XNACK_MASK_LO:
 
  207        case AMDGPU::XNACK_MASK_HI:
 
  210        case AMDGPU::LDS_DIRECT:
 
  221        case AMDGPU::SRC_VCCZ:
 
  224        case AMDGPU::SRC_EXECZ:
 
  227        case AMDGPU::SRC_SCC:
 
  235        assert((!RC || 
TRI.isVGPRClass(RC) || 
TRI.isSGPRClass(RC) ||
 
  236                TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
 
  237                AMDGPU::TTMP_64RegClass.contains(Reg) ||
 
  238                AMDGPU::TTMP_128RegClass.contains(Reg) ||
 
  239                AMDGPU::TTMP_256RegClass.contains(Reg) ||
 
  240                AMDGPU::TTMP_512RegClass.contains(Reg)) &&
 
  241               "Unknown register class");
 
  243        if (!RC || !
TRI.isVGPRClass(RC))
 
  246        if (
MI.isCall() || 
MI.isMetaInstruction())
 
  250        unsigned HWReg = 
TRI.getHWRegIndex(Reg);
 
  251        int MaxUsed = HWReg + Width - 1;
 
  252        MaxVGPR = std::max(MaxUsed, MaxVGPR);
 
  260            TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
 
  262        const Function *Callee = getCalleeFunction(*CalleeOp);
 
  275        if (Callee && !isSameFunction(MF, Callee))
 
  276          Info.Callees.push_back(Callee);
 
  278        bool IsIndirect = !Callee || Callee->isDeclaration();
 
  281        if (!Callee || !Callee->doesNotRecurse()) {
 
  282          Info.HasRecursion = 
true;
 
  286          if (!
MI.isReturn()) {
 
  293            Info.CalleeSegmentSize = std::max(
 
  294                Info.CalleeSegmentSize,
 
  295                static_cast<uint64_t>(AssumedStackSizeForExternalCall));
 
  300          Info.CalleeSegmentSize =
 
  301              std::max(Info.CalleeSegmentSize,
 
  302                       static_cast<uint64_t>(AssumedStackSizeForExternalCall));
 
  306          Info.UsesFlatScratch = ST.hasFlatAddressSpace();
 
  307          Info.HasDynamicallySizedStack = 
true;
 
  308          Info.HasIndirectCall = 
true;
 
  314  Info.NumVGPR = MaxVGPR + 1;
 
 
unsigned const MachineRegisterInfo * MRI
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
 
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
 
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
 
Analyzes how many registers and other resources are used by functions.
 
AMD GCN specific subclass of TargetSubtarget.
 
const HexagonInstrInfo * TII
 
Register const TargetRegisterInfo * TRI
 
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
 
Target-Independent Code Generator Pass Configuration Options pass.
 
uint32_t getNumNamedBarriers() const
 
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
 
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo Result
 
bool hasFlatScratchInit() const
 
Module * getParent()
Get the module that this global value is contained inside of...
 
Generic base class for all target subtargets.
 
const Triple & getTargetTriple() const
 
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
 
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
 
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
 
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
 
Function & getFunction()
Return the LLVM function that this machine code represents.
 
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
 
Representation of each machine instruction.
 
MachineOperand class - Representation of each machine instruction operand.
 
bool isReg() const
isReg - Tests if this is a MO_Register operand.
 
Register getReg() const
getReg - Returns the register number.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
 
Wrapper class representing virtual and physical registers.
 
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
 
GCNUserSGPRUsageInfo & getUserSGPRInfo()
 
bool isStackRealigned() const
 
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
 
Primary interface to the complete machine description for the target machine.
 
OSType getOS() const
Get the parsed operating system type of this triple.
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
unsigned getAMDHSACodeObjectVersion(const Module &M)
 
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
 
initializer< Ty > init(const Ty &Val)
 
This is an optimization pass for GlobalISel generic memory operations.
 
char & AMDGPUResourceUsageAnalysisID
 
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
 
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
 
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
 
DWARFExpression::Operation Op
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, uint32_t AssumedStackSizeForExternalCall) const
 
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
 
FunctionResourceInfo ResourceInfo
 
A special type used by analysis passes to provide an address that identifies that particular analysis...